Visible and Invisible Diasporas: Ethiopian Somalis in the Diaspora Scene
Using American Community Survey data to map Somali and Ethiopian populations in the US. Mapping in R, data analysis in R, migration statistics.
This script accompanies a journal article. Please cite the original article in which the research was reported:
Thompson, Daniel K. 2017. Visible and Invisible Diasporas: Ethiopian Somalis in the Diaspora Scene. Bildhaan: An International Journal of Somali Studies, 17: 1-31
The analysis is divided into two parts based on two publicly available data sources. The files used in this analysis are available HERE.
WARNING: My main focus is not data science and I do not profess to be a wizard in R. There are probably more efficient ways to complete many of the tasks undertaken here.
Part 1: Global migration maps and US statistics.
Data from United Nations, Department of Economic and Social Affairs (2015). Trends in International Migrant Stock: Migrants by Destination and Origin (United Nations database, POP/DB/MIG/Stock/Rev.2015). I appended sheets for each 5-year report together. Please cite UNDESA source.
library(rworldmap)
library(ggplot2)
library(maps)
library(maptools)
library(data.table)
library(dplyr)
Read in data on global migration and subset for emigration from countries in the Horn of Africa:
UN_data<-read.csv("I://Econ_data/Migration_remittances_analyzed/UN_data_1990_2015.csv") #Your filename here
hoa_em<-subset(UN_data, subset=UN_data$origin=="Djibouti"|
UN_data$origin=="Eritrea"|
UN_data$origin=="Ethiopia"|
UN_data$origin=="Kenya"|
UN_data$origin=="Somalia"|
UN_data$origin=="South Sudan"|
UN_data$origin=="Sudan"|
UN_data$origin=="Total")
worldmap<-fortify(spTransform(getMap(),
CRS("+proj=robin +lon_0=11.5 +x0_0
+y0_0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")))
#Base map
worldmap1<-fortify(spTransform(getMap(),
CRS("+proj=robin +lon_0=11.5 +x0_0
+y0_0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")))
coordinates(worldmap1)<-c("long", "lat")
colnames(worldmap)[6]<-"region"
a<-hoa_em[hoa_em$origin=="Ethiopia",]
a<-a[,-c(1)]
a_dates<-reshape(a, idvar=c("destination", "countrycode","origin"),
timevar="year",
direction="wide")
colnames(a_dates)[4:9]<-c("x1990","x1995","x2000","x2005","x2010","x2015")
a_dates$x1990<-as.numeric(as.character(a_dates$x1990))
a_dates<-a_dates[order(a_dates$x1990),]
breaks=c(0.9,100,500,1000,5000,10000,20000,50000,250000, Inf)
a_dates$x1990<-cut(a_dates$x1990, breaks=breaks)
labels<-as.character(unique(a_dates$x1990))
labels<-gsub("\(", "", labels)
labels<-gsub(",", "-", labels)
labels<-gsub("e\+03", ",000", labels)
labels<-gsub("]", "", labels)
labels<-gsub("e\+04", "0,000", labels)
labels<-gsub("2.5e\+05", "250,000", labels)
labels<-gsub("e\+06", ",000,000", labels)
labels<-gsub("\.", "", labels)
labels<-gsub("09", "1", labels)
labels<-gsub("100-500", "101-500", labels)
labels<-gsub("500-1,000", "501-1,000", labels)
labels<-gsub("1,000-5,000", "1,001-5,000", labels)
labels<-gsub("5,000-10,000", "5,001-10,000", labels)
labels<-gsub("10,000-20,000", "10,001-20,000", labels)
labels<-gsub("20,000-50,000", "20,001-50,000", labels)
labels<-gsub("50,000-250,000", "50,001-250,000", labels)
labels<-gsub("250,000-Inf", "250,001+", labels)
#Some parameters (can be changed for different countries if you want to go map by map)
country="Ethiopia"
year=1990
yearselect="1990"
colors1<-"YlGn"
ditch_axes<-theme(axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.background=element_rect(fill="white"),
legend.key.size=unit(0.35, "cm"))
#Subset for Ethiopia and reshape
a<-hoa_em[hoa_em$origin==country,]
a<-a[,-c(1)]
a_dates<-reshape(a, idvar=c("destination", "countrycode","origin"),
timevar="year",
direction="wide")
colnames(a_dates)[4:9]<-c("x1990","x1995","x2000","x2005","x2010","x2015")
colnames(a_dates)1<-"region"
a_dates$x1990<-as.numeric(as.character(a_dates$x1990))
a_dates$x1995<-as.numeric(as.character(a_dates$x1995))
a_dates$x2000<-as.numeric(as.character(a_dates$x2000))
a_dates$x2005<-as.numeric(as.character(a_dates$x2005))
a_dates$x2010<-as.numeric(as.character(a_dates$x2010))
a_dates$x2015<-as.numeric(as.character(a_dates$x2015))
a_dates$region<-gsub("United\sKingdom.*", "United Kingdom", a_dates$region)
a_dates$region<-gsub("United\sStates\sof\sAmerica", "United States of America", a_dates$region)
a_dates$region<-gsub("Russian\sFederation", "Russia", a_dates$region)
a_dates$region<-gsub("Venezuela.*", "Venezuela", a_dates$region)
a_dates$region<-gsub("Bolivia.*", "Bolivia", a_dates$region)
a_dates$region<-gsub("Syria.*", "Syria", a_dates$region)
a_dates$region<-gsub("Iran.*", "Iran", a_dates$region)
a_dates$region<-gsub("Republic\sof\sKorea", "South Korea", a_dates$region)
a_dates$region<-gsub("Democratic\sPeople's\sRepublic\sof\sKorea", "North Korea", a_dates$region)
a_dates$region<-gsub("Viet\sNam", "Vietnam", a_dates$region)
a_dates$region<-gsub("Democratic\sRepublic\sof\sthe\sCongo", "DRC", a_dates$region)
a_dates$region<-gsub("Congo", "Republic of Congo", a_dates$region)
a_dates$region<-gsub("DRC", "Democratic Republic of the Congo", a_dates$region)
a_dates$region<-gsub(".*Ivoire", "Ivory Coast", a_dates$region)
#for pre-2010 data, make South Sudan the same as Sudan
a_dates[a_dates$region=="South Sudan",]$x1990<-a_dates[a_dates$region=="Sudan",]$x1990
a_dates[a_dates$region=="South Sudan",]$x1995<-a_dates[a_dates$region=="Sudan",]$x1995
a_dates[a_dates$region=="South Sudan",]$x2000<-a_dates[a_dates$region=="Sudan",]$x2000
a_dates[a_dates$region=="South Sudan",]$x2005<-a_dates[a_dates$region=="Sudan",]$x2005
#Convert to data table for easy join (more important if you keep multiple countries in the subset)
a_dates<-as.data.table(a_dates)
worldmap<-as.data.table(worldmap)
worldmap$region<-as.character(worldmap$region)
a_dates$region<-as.character(a_dates$region)
Z<-inner_join(worldmap, a_dates, by = "region", copy = TRUE)
Z<-as.data.table(Z)
#Get coordinates and project
Z$lat<-as.numeric(Z$lat)
Z$long<-as.numeric(Z$long)
Z$lat<-unlist(Z$lat)
coordinates(Z)<-c("long", "lat")
proj4string(Z)<-CRS("+proj=robin +ellps=WGS84")
colnum<-10 #This is the x1990 column, for year 1990. Select year as appropriate
colnames(Z@data)[colnum]<-"select"
#Background map
gg<-ggplot()
gg<-gg + geom_map(data=as.data.frame(worldmap1),map=as.data.frame(worldmap),
aes(x=long, y=lat, map_id=id),
color="black", fill="white",
size=0.25)
gg<-gg + coord_equal()
gg<-gg+ditch_axes
Z$select<-as.numeric(as.character(Z$select))
Z$select_cat<-cut(Z$select, breaks=breaks)
Zselect<-Z[!is.na(Z$select_cat),]
Zselect<-Zselect[order(Zselect$select),]
gg<-gg+geom_polygon(data=as.data.frame(Zselect),
aes(x=long, y=lat, group=group,
fill=select_cat)
, color="black", lwd=0.01)
gg<-gg+scale_x_discrete(drop=FALSE,breaks=breaks)
gg<-gg+scale_fill_brewer(drop=FALSE,palette=colors1, name=title,
guide=guide_legend(reverse=TRUE),
labels=labels, na.value="white")
gg<-gg+theme(panel.background=element_rect(fill="white"))+
theme(legend.key.size=unit(0.35,"cm"))
gg<-gg+coord_fixed(xlim=c(-13000000,14000000),
ylim=c(-6000000,8000000))
gg
Easy enough, I think. You should be able to see where to change the country and the dates. For outputting multiple maps, a loop can be used, which would follow the same procedure but start something like this:
countries<-unique(as.character(hoaem$origin))
for(countrynum in c(1:8)){
year<-seq(from=1990, to=2015, by=5)
year<-as.character(year)
for(yearnum in c(1:6)){
yearselect=year[yearnum]
#Specify output filenames
title=countries[countrynum]
title=gsub("$", "-born\npopulation, ", title)
title=gsub("$", yearselect, title)
filename=countries[countrynum]
filename=gsub("$", yearselect, filename)
filename=gsub("$", ".jpg", filename)
filename=gsub("^", "colormap", filename)
#Specify color scheme for different origin countries
colors1=countries[countrynum]
colors1=gsub("Ethiopia", "YlGn", colors1)
colors1=gsub("Eritrea", "RdPu", colors1)
colors1=gsub("Somalia", "Blues", colors1)
colors1=gsub("Kenya", "Purples", colors1)
colors1=gsub("Djibouti", "Oranges", colors1)
colors1=gsub("Sudan", "Oranges", colors1)
colors1=gsub("Uganda", "YlGnBu", colors1)
colors1=gsub("South Sudan", "YlGnBu", colors1)
a=hoa_em[hoa_em$origin==countries[countrynum],]
a=a[,-c(1)]
a_dates=reshape(a, idvar=c("destination", "countrycode","origin"),
timevar="year",
direction="wide")
#Then you would basically follow the same procedure as done for Ethiopia above, ending with a plot entitled gg. You can then output as jpeg:
jpeg("filename", dimensions)
print(gg)
dev.off()
}}
And now for the chart (Figure 3): We just select the immigration data for the U.S.
US<-hoa_em[hoa_em$destination=="United States of America",]
US<-US[US$origin=="Ethiopia"|
US$origin=="Kenya"|
US$origin=="Somalia",]
gg<-ggplot(US, aes(x=year, y=persons, group=origin))+
geom_line(aes(color=origin), lwd=0.75)+
scale_color_manual(values=c("grey70", "grey50", "grey10"),
name="Origin")+
theme(panel.background=element_rect(fill="white"))+
labs(x="Year", y="Persons (thousands)")+
scale_y_continuous(breaks=c(25000,50000,75000,100000,125000,150000,175000),
labels=c("25", "50", "75", "100", "125", "150", "175"))
gg
# Part 2: American Communities Survey Data
Data for this section were downloaded directly into R using the acs.r package, as described in the publication. You must register as a developer (which is free) to obtain an API code which will allow you to ping the US Census Bureau website and download data through r. Copy and paste the API code into R.
The basic script for direct data download is:
library(acs)
api.key.install("YOUR (OBNOXIOUSLY LENGHTY) API KEY HERE")
api.key.install("fcdcb277a9a8a3285397e6f61bb5f1eb02404f8a")
county.df = map_data("county")
county.df = lapply(county.df, function(y) gsub("de kalb", "dekalb", y)) #(DeKalb, GA is an important county for my analysis)
county.df = as.data.frame(county.df)
names(county.df)[5:6]=c("state", "county")
state.df=map_data("state")
us.county=geo.make(state="", county="")
B04004_2009<-acs.fetch(endyear=2009, geography = us.county,
table.number="B04004", col.names="pretty")
B04005_2009<-acs.fetch(endyear=2009, geography = us.county,
table.number="B04005", col.names="pretty")
And so on, selecting tables as needed. The data are again available from the "Capital in the Borderlands" dataverse site linked above. However, please cite ACS data files from the US Census website as well. I also imagine anyone using this data might want to update to include the 2016 data.
Here are the general data. The data are a county map of the US, so first we will need to use the !duplicated() function to do the analysis county by county.
a<-read.csv("I://Econ_data/US_imm_Africa/RevisedData/Bildhaan_data1.csv")
#For analysis (not mapping):
a<-a[,-c(1)]
a<-a[!duplicated(a[,c(1,2)]),]
a<-a[,-c(3:7)]
a<-melt(a, id.vars=c("state", "county"))
a$year<-gsub(".*\_2", "2", a$variable)
a$variable<-gsub("\2.*", "", a$variable)
a$type<-gsub(".*\", "", a$variable)
colnames(a)[3]="origin"
a$origin = gsub("\_.*", "", a$origin)
a<-reshape(a, direction="wide",
idvar=c("state", "county",
"origin", "year" ),
timevar="type")
a$Proportion<-a$value.ancestry/a$value.born
a<-do.call(data.frame, lapply(a, function(x) replace(x, is.infinite(x), NA)))
#For some reason this generates a few null rows. Probably my fault.
a<-a[!is.na(a$state),]
for(country in c("Ethiopia", "Kenya", "Somalia")){
select<-a[a$origin==country,]
for(year in c(2009, 2010, 2011, 2013, 2014, 2015)){
print(summary(select[select$year==year,]$Proportion))
}}
That should get you Table 2 in the publication.
Then:a<-reshape(a, direction="wide",
idvar=c("state", "county",
"year" ),
timevar="origin")
someth<-a[a$value.ancestry.Somalia>=50 &
a$value.born.Ethiopia>=50,]
somken<-a[a$value.ancestry.Somalia>=50 &
a$value.born.Kenya>=50,]
#Remove outlier: Clayton County, GA
someth = someth[order(-someth$Proportion.Somalia),]
someth = someth[-c(1),]
somken = somken[order(-somken$Proportion.Somalia),]
somken = somken[-c(1),]
for(year in c(2009,2010,2011,2013,2014,2015)){
print(summary(someth[someth$year==year & !is.na(someth$Proportion.Ethiopia),]$Proportion.Ethiopia))
print(summary(someth[someth$year==year & !is.na(someth$Proportion.Somalia),]$Proportion.Somalia))
}
for(year in c(2009,2010,2011,2013,2014,2015)){
print(summary(somken[somken$year==year,]$Proportion.Kenya))
print(summary(somken[somken$year==year,]$Proportion.Somalia))
}
The 2015 numbers from the above calculation were used for Table 3.
Now we subset further for the comparison visualized in Figure 4 and the regressions reported in the caption.
someth2015<-someth[someth$year==2015,]
plot1<-ggplot(someth2015, aes(x=Proportion.Ethiopia, y=Proportion.Somalia))+
geom_point(size=3,fill="grey50", pch=21)+
geom_smooth(method="lm")+
geom_hline(yintercept=1.835, linetype="dashed")+
geom_vline(xintercept=1.364, linetype="dashed")+
labs(x="Proportion of Ethiopian descent\nto Ethiopian birth",
y="Proportion of Somali descent\nto Somalian birth")+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.background=element_blank(),
axis.line=element_line(color="black"),
legend.key=element_blank())+
scale_y_continuous(breaks=c(1,2,3,4,5,6))+
scale_x_continuous(breaks=c(1,2,3,4,5,6))
plot1
## Warning: Removed 283 rows containing non-finite values (stat_smooth).
## Warning: Removed 283 rows containing missing values (geom_point).
m<-lm(Proportion.Somalia~Proportion.Ethiopia, data=someth)
summary(m)
To create maps we'll go back to the full dataset, and we'll take a little different track to get the stats since we only need the 2015 proportions.
a<-read.csv("I://Econ_data/US_imm_Africa/RevisedData/Bildhaan_data1.csv")
a<-a[,-c(1)]
coordinates(a)<-c("long", "lat")
proj4string(a)<-CRS("+proj=longlat +ellps=WGS84")
a<-spTransform(a,
CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-97.5
+x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m"))
a@data[,1]=1:nrow(a)
names(a)1="id"
amap<-as.data.frame(a)
amap<-amap[order(amap$order),]
amap<-fortify(amap)
amap<-do.call(data.frame, lapply(amap, function(x)
replace(x, is.infinite(x),NA)))
someth<-amap[amap$Somalia_ancestry_2015>=50 &
amap$Ethiopia_born_2015>=50,]
someth<-someth[!is.na(someth$id),]
someth$Prop_desc_bn_som2015 = someth$Somalia_ancestry_2015/someth$Somalia_born_2015
someth$Prop_desc_bn_eth2015 = someth$Ethiopia_ancestry_2015/someth$Ethiopia_born_2015
library(rgdal)
library(rgeos)
usstates2<-readOGR(dsn="I://R/US_Census/GeoJSON/states.geojson",layer="OGRGeoJSON")
#Set Albers equal-area projection parameters
aea.proj2 <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-97.5
+x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m"
usstatesmap2<-spTransform(usstates2,CRS(aea.proj2))
usstatesmap2<-SpatialPolygonsDataFrame(gBuffer(usstatesmap2, byid=TRUE,
width=0),
data=usstatesmap2@data)
usstatesmap2_map<-fortify(usstatesmap2, region="GEOID")
#Here I manually put in the quartile and mean breaks to color the map
sombreaks1=c(0,0.5,1,1.228,1.55,1.835,2.094,Inf)
ethbreaks=c(0,0.5,1,1.069,1.282,1.364,1.599,Inf)
someth$Prop_desc_bn_som2015<-cut(someth$Prop_desc_bn_som2015,
breaks=sombreaks1)
someth$Prop_desc_bn_eth2015<-cut(someth$Prop_desc_bn_eth2015,
breaks=ethbreaks)
ditch_axes<-theme(axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.background=element_rect(fill="white"),
legend.key.size=unit(0.35, "cm"))
someth=someth[order(someth$Prop_desc_bn_som2015),]
labels=as.character(unique(someth$Prop_desc_bn_som2015))
labels=gsub("\(", "", labels)
labels=gsub("\]", "", labels)
labels=gsub(",", "-", labels)
labels=gsub("-Inf", "+", labels)
someth=someth[order(someth$order),]
ggus<-ggplot()+
geom_polygon(data=amap, aes(x=long, y=lat, group=group),
color="darkgrey", fill="white", size=0.1)+
ditch_axes+theme(panel.background=element_rect(fill="white"))
ggus<-ggus+geom_polygon(data=someth, aes(x=long,y=lat,group=group,
fill=Prop_desc_bn_som2015),
color="black", size=0.1)+
scale_fill_brewer(drop=FALSE,palette="RdYlGn",
guide=guide_legend(reverse=TRUE),
labels=labels, na.value="white")
ggus<-ggus+geom_map(data=usstatesmap2_map, map=usstatesmap2_map,
aes(x=long,y=lat, map_id=id),
color="black",fill=NA,lwd=0.2)+
coord_fixed(xlim=c(-2500000,2200000),
ylim=c(-1200000,1500000))
## Warning: Ignoring unknown aesthetics: x, y
ggus
Repeat for the Ethiopia proportions.
Now multiple ancestry data :
b<-read.csv("I://Econ_data/US_imm_Africa/RevisedData/Bildhaan_data2.csv")
b$totalanc<-b$value.MultAn+b$value.SingleAn
b$Prop_multanc<-b$value.MultAn/b$totalanc
b<-reshape(b, direction="wide",
idvar=c("ID", "state", "county", "year"),
timevar="origin")
We will return to this b dataset later for some graphs. For now, we'll modify it into an object named “b2” so we can do some basic analysis.
b2<-reshape(b, direction="wide",
idvar=c("ID", "state", "county"),
timevar="year")
b2$PC_EthSingle_2009_2015<-(b2$value.SingleAn.Ethiopia.2015-b2$value.SingleAn.Ethiopia.2009)/b2$value.SingleAn.Ethiopia.2009
b2$PC_SomSingle_2009_2015<-(b2$value.SingleAn.Somali.2015-b2$value.SingleAn.Somali.2009)/b2$value.SingleAn.Somali.2009
b2$PC_SomMult_2009_2015<-(b2$value.MultAn.Somali.2015-b2$value.MultAn.Somali.2009)/b2$value.MultAn.Somali.2009
b2$PC_EthMult_2009_2015<-(b2$value.MultAn.Ethiopia.2015-b2$value.MultAn.Ethiopia.2009)/b2$value.MultAn.Ethiopia.2009
b2<-do.call(data.frame, lapply(b2, function(x) replace(x, is.infinite(x), NA)))
summary(b2$PC_EthSingle_2009_2015)
summary(b2$PC_SomSingle_2009_2015)
summary(b2$PC_EthMult_2009_2015)
summary(b2$PC_SomMult_2009_2015)
The above yields the results reported in Table 4. To look at particular metro areas that I am interested in, I subset again:
b3<-b2[b2$county=="los angeles"
|b2$county=="dekalb"
|b2$county=="montgomery"
|b2$county=="fairfax"
|b2$county=="hennepin"
|b2$county=="ramsey"
|b2$county=="clark"
|b2$county=="dallas"
|b2$county=="king",]
b3 = b3[!b3$state=="tennessee",]
b3 = b3[!b3$state=="ohio",]
summary(b3$PC_EthSingle_2009_2015)
summary(b3$PC_EthMult_2009_2015)
summary(b3$PC_SomSingle_2009_2015)
summary(b3$PC_SomMult_2009_2015)
The above yields Table 5.
Now, to graph the changes in multiple ancestry over time for these two datasets (b2 and b3), we will just need the means of each column and then reorganize a little to graph with ggplot.
library(data.table)
b2["Means",]<-c("All", "All", "All", colMeans(b2[,4:91], na.rm=TRUE))
#This will generate some warnings since some of the columns are not numeric.
b2<-melt(b2, id.vars=c("state", "county", "ID"))
b2<-b2[b2$ID=="All",]
b2<-b2[,-c(1:2)]
b2$year<-gsub(".*\.2", "2", b2$variable)
b2$variable<-gsub("\.2.*", "", b2$variable)
b2$variable<-gsub("value\.", "", b2$variable)
b2$origin<-gsub(".*\.", "", b2$variable)
b2$variable<-gsub("\..*", "", b2$variable)
b2$value<-as.numeric(as.character(b2$value))
gg<-ggplot(b2, aes(x=year, y=value, group=origin))+
geom_line(data=b2[b2$variable=="Prop_multanc",], aes(linetype=origin),lwd=1.5)+
scale_linetype_manual(values=c("dashed", "solid"),
name="Ancestry",
labels=c("Ethiopian", "Somali"))+
theme(panel.background=element_rect(fill="white"),
legend.key = element_blank(),
legend.position="none")+
labs(x="Year", y="Proportion reporting\nmultiple ancestry")+
geom_text(aes(label="Ethiopian"), x=3.8, y=0.1)+
geom_text(aes(label="Somali"),x=2.5, y=0.12)
gg
The above is Figure 6. The same thing can be done with the data “b3” to produce Figure 7.
2009 graph of multiple ancestry populations (Figure 8, top)
b2009<-b[b$year==2009,]
b2009<-b2009[b2009$totalanc.Ethiopia>0 &
b2009$totalanc.Somali>0,]
breaks=c(50,74,99,199,499,999,4999,9999,Inf)
labels=c("50-74", "75-99", "100-199", "200-499", "500-999", "1,000-4,999",
"5,000-9,999", "10,000+")
bnames2009<-b2009[b2009$value.MultAn.Somali>=75 |
b2009$value.MultAn.Ethiopia>=200,]
bnames2009<-bnames2009[bnames2009$value.MultAn.Somali>0 &
bnames2009$value.MultAn.Ethiopia>0,]
bnames2009$county<-gsub("los angeles", "L.A.", bnames2009$county)
gg2009<-ggplot(b2009[b2009$value.MultAn.Ethiopia>0 &
b2009$value.MultAn.Somali>0, ], aes(x=value.MultAn.Ethiopia,
y=value.MultAn.Somali))+
geom_point(aes(fill=cut(totalanc.Somali, breaks=breaks)),size=3, color="black", pch=21)+
geom_smooth(method="lm")+
scale_fill_brewer(drop=FALSE,palette="Greys",
guide=guide_legend(reverse=TRUE),
labels=labels,
name="Population reporting\nSomali ancestry")+
labs(x="Population reporting multiple Ethiopian ancestry",
y="Population reporting multiple Somali ancestry")+
geom_text(data=bnames2009, aes(x=value.MultAn.Ethiopia,
y=value.MultAn.Somali,
label=toupper(county)),
hjust = 0.5, vjust=-0.5)+
theme(panel.background=element_rect(fill="white"),
legend.key=element_blank())
gg2009
2015 graph (Figure 8, bottom):
b2015<-b[b$year==2015,]
b2015<-b2015[b2015$totalanc.Ethiopia>0 &
b2015$totalanc.Somali>0,]
bnames2015<-b2015[b2015$value.MultAn.Somali>=250 |
b2015$value.MultAn.Ethiopia>=300,]
gg2015<-ggplot(b2015[b2015$value.MultAn.Somali>0 &
b2015$value.MultAn.Ethiopia>0,],
aes(x=value.MultAn.Ethiopia,
y=value.MultAn.Somali))+
geom_point(aes(fill=cut(totalanc.Somali, breaks=breaks)),size=3, color="black", pch=21)+
geom_smooth(method="lm")+
scale_fill_brewer(palette="Greys",
guide=guide_legend(reverse=TRUE),
labels=labels,
name="Population reporting\nSomali ancestry")+
labs(x="Population reporting multiple Ethiopian ancestry",
y="Population reporting multiple Somali ancestry")+
geom_text(data=bnames2015, aes(x=value.MultAn.Ethiopia,
y=value.MultAn.Somali,
label=toupper(county)),
hjust = 0.5, vjust=-0.5)+
theme(panel.background=element_rect(fill="white"),
legend.key=element_blank())
gg2015
And I think that's about it for now.