%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
#import seaborn as sns
#from scipy.stats import ks_2samp
#import glob
#import time
import requests
import bs4
#import lxml
import altair as alt
import folium
import geopandas as gdp
import math
import json
#from mpl_toolkits import mplot3d
import plotly
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import interact


#read the data
co2 = pd.read_csv("co2.csv")
temp = pd.read_csv("temperatures.csv")
crop = pd.read_csv("crops.csv")
pop = pd.read_excel('Population Estimates for Districts and PCs in India, 2020.xlsx')
geoinfo = pd.read_csv('district wise centroids.csv')
cs = pd.read_excel('cotton-180.xlsx',skiprows = 1)


#scraping resource from[3]
sector = requests.get('https://statisticstimes.com/economy/country/india-gdp-sectorwise.php#:~:text=Sector%2Dwise%20GDP%20in%20India,Financial%2C%20real%20estate%20%26%20prof%20servs').text
sector = bs4.BeautifulSoup(sector)
tab = sector.find('table', attrs = {'id': 'table_id4'})
headers = []
for i in tab.find_all('th'):
    headers.append(i.text)
head =[]
for h in headers[:7]:
    head.append(h)
    head.append(h+'1')
head = head[1:]
mydata = pd.DataFrame(columns = head)
for j in tab.find_all('tr')[2:]:
    row_data = j.find_all('td')
    row = [k.text for k in row_data]
    mydata.loc[len(mydata)] = row
#slicing
mydata = mydata[['Year1','Agriculture & Allied', 'Agriculture', 
                 'Industry', 'Mining & Quarrying', 'Manufacturing', 
                 'Services']]
mydata['Year1'] = mydata['Year1'].str[:4].astype('int')
#Dataframe slicing,cleaning
mydata = mydata.sort_values(by = 'Year1', ascending = False).set_index('Year1')
value = [18.2, 15.42+ 19.53, 22.05, 2.7+ 5.16,12.89]
value.append(100-np.array(value).sum())
value.sort(reverse = False)
mydata.iloc[0]

Agriculture & Allied    18.20
Agriculture             15.79
Industry                24.77
Mining & Quarrying       2.13
Manufacturing           12.89
Services                57.03
Name: 2013, dtype: object


# Weighted Pie
source = pd.DataFrame({"category": 
                       ['Other','1.Construction', '2.Manufacturing', '3.Agriculture & Allied', '4.Financial, real-estate, etc', '5.Other servs'],"values": value})
base = alt.Chart(source).encode(
    theta=alt.Theta("values:Q", stack=True),
    radius=alt.Radius("values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)),
    color="category:N",
)

c1 = base.mark_arc(innerRadius=20, stroke="#fff")

c2 = base.mark_text(radiusOffset=10).encode(text="values:Q")

c1 + c2


#Population dependency pie chart
source = pd.DataFrame(
    {"category": ["Agriculture", "Industry", "Service"], "value": [70, 12 ,18]}
)

base = alt.Chart(source).encode(
    theta=alt.Theta("value:Q", stack=True), color=alt.Color("category:N", legend=None)
)

pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=150, size=12).encode(text="category:N")

pie + text


pop['District Name, State Letter Code'] = pop['District Name, State Letter Code'].str.split(',').apply(lambda x:x[0])
toge = pop.merge(geoinfo, left_on = 'District Name, State Letter Code', right_on = 'District', how = 'inner')
s_pop = pop.groupby('State Name').sum().reset_index()
s_pop.sort_values(by = 'District Population').head()
geodf = gdp.read_file('https://raw.githubusercontent.com/Subhash9325/GeoJson-Data-of-Indian-States/master/Indian_States')
s_pop['State Name'] = (s_pop['State Name'].replace('Andaman & Nicobar Islands','Andaman and Nicobar')
                      .replace('Jammu & Kashmir','Jammu and Kashmir')
                      .replace('NCT of Delhi','Delhi' )
                      .replace('Odisha', 'Orissa')
                      .replace('Uttarakhand', 'Uttaranchal')
                      .replace('Dadra and Nagar Haveli and Daman and Diu','Dadra and Nagar Haveli' ))
s_pop.head()


s_pop2 = s_pop.set_index('State Name')['District Population']
center_lat = toge['Latitude'].mean()
center_long = toge['Longitude'].mean()

m = folium.Map(location=[center_lat, center_long],tiles='cartodbpositron')#'CartoDB positron'

folium.FitBounds([(center_lat-15,center_long-19), (center_lat+12,center_long+19)]).add_to(m)
url = (
    "https://raw.githubusercontent.com/Subhash9325/GeoJson-Data-of-Indian-States/master/Indian_States"
)
import branca
steps = s_pop2.to_numpy()
colorscale = branca.colormap.linear.YlOrBr_05.to_step(steps, np.arange(0,1,0.001)).scale(1e+04, 1.2e+08)
def style_function(feature):
    popu = s_pop2.get((feature['properties']['NAME_1']), None)
    return {
        "fillOpacity": 0.6,
        "weight": 0,
        "fillColor": "#black" if popu is None else colorscale(popu),
    }

folium.GeoJson(
    json.loads(requests.get(url).text),
    style_function=style_function,
).add_to(m)

m
#folium.TileLayer(tiles = 'MapQuest Open Aerial', attr = 'https://www.mapquest.com/').add_to(m)


from IPython.display import Image
Image(filename='altitude.png', width = 600, height = 800)


top_ten = crop[(crop['Crop_Year'] > 2009) & (crop['Crop_Year'] < 2015)].groupby('Crop').mean()['Production'].nlargest(11).drop('Total foodgrain').index
recent_top_10 = crop.set_index("Crop").loc[top_ten].reset_index()
recent_five_year = recent_top_10[(recent_top_10['Crop_Year'] > 2009) & (recent_top_10['Crop_Year'] < 2015)].dropna()[['Crop_Year','Crop', 'Production']].groupby(['Crop_Year', 'Crop']).mean().reset_index()
recent_five_year['Crop_Year'] = recent_five_year['Crop_Year'].astype('string')
recent_five_year['Production'] = recent_five_year['Production'].apply(lambda x: x**(1/3))#math.log(x,1.001)

stack_bar = alt.Chart(recent_five_year).mark_bar().encode(
    x=alt.X('Production',title = 'Cube Root Production'),
    y='Crop_Year',
    color='Crop'
).properties(title = 'stack bar chart')
stack_bar


tem = temp[['YEAR','ANNUAL','JAN-FEB', 'MAR-MAY','JUN-SEP','OCT-DEC']].melt('YEAR', var_name = 'category', value_name = 'temperature')
# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['YEAR'], empty='none')
# The basic line
line = alt.Chart(tem).mark_line(interpolate='basis').encode(
    alt.X('YEAR:Q'),
    alt.Y('temperature:Q',scale = alt.Scale(zero = False)),
    color='category:N'
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(tem).mark_point().encode(
    x='YEAR:Q',
    opacity=alt.value(0),
).add_selection(
    nearest
)
# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'temperature:Q', alt.value(' '))
)
# Draw a rule at the location of the selection
rules = alt.Chart(tem).mark_rule(color='gray').encode(
    x='YEAR:Q',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=600, height=300)


co2_temp = co2.merge(temp, left_on = 'Year', right_on = 'YEAR')[['YEAR','C02_Emission_Kt', 'ANNUAL']]

fig = go.Figure(data=[go.Scatter3d(
    x=co2_temp['YEAR'],
    y=co2_temp['C02_Emission_Kt'],
    z=co2_temp['ANNUAL'],
    mode='markers',
    marker=dict(
        size=2,
        color=co2_temp['ANNUAL'],                # set color to an array/list of desired values
        colorscale='amp',   # choose a colorscale
        opacity=0.8
    )
)])

#fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()


states_top_ten=dict()
crop = crop.dropna()
#vistual
for i in top_ten:
    wheat = crop[(crop['Crop'] == i)][['State_Name','Crop_Year','Production']]
    wheat = wheat.groupby(['State_Name','Crop_Year']).sum().reset_index()
#wheat = wheat.melt('Crop_Year', var_name = 'State_Name', value_name = 'Production')
    get_rid = wheat.groupby('State_Name').max()
    get_rid = get_rid[get_rid['Production']> np.mean((wheat.groupby('State_Name')['Production'].mean()))/2].index
    wheat = wheat.set_index('State_Name').loc[get_rid].reset_index()
    highlight = alt.selection(type='single', on='mouseover',
                          fields=['State_Name'], nearest=True)
    
    base = alt.Chart(wheat).encode(
    alt.X('Crop_Year:Q',scale=alt.Scale(zero = False)),
    alt.Y('Production:Q'),
    color='State_Name:N'
    )

    points = base.mark_circle().encode(
    opacity=alt.value(0)
    ).add_selection(
    highlight
    ).properties(
    width=600
    )

    lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
    )
    states_top_ten[i] = points + lines
#interact
def inter(crop = 'Coconut '):
    return states_top_ten[crop]
top_ten

Index(['Coconut ', 'Sugarcane', 'Wheat', 'Jute', 'Rice', 'Cotton(lint)',
       'Mango', 'Potato', 'Paddy', 'Soyabean'],
      dtype='object', name='Crop')


interact(inter, x = top_ten);

interactive(children=(Text(value='Coconut ', description='crop'), Output()), _dom_classes=('widget-interact',)…


Gujarat_2011 = crop[(crop['State_Name'] == 'Gujarat') &(crop['Crop'] == 'Cotton(lint)') & (crop['Crop_Year'] == 2011)]
Gujarat_2012 = crop[(crop['State_Name'] == 'Gujarat') &(crop['Crop'] == 'Cotton(lint)') & (crop['Crop_Year'] == 2012)]
Gujarat_2012.head()


two_year = crop[(crop['State_Name'] == 'Gujarat') &(crop['Crop'] == 'Cotton(lint)') & ((crop['Crop_Year'] == 2011) | (crop['Crop_Year'] == 2012))]
X = list(map(lambda x:x[:6],(two_year['District_Name'].unique())))
Y = Gujarat_2011['Production']
Z = Gujarat_2012['Production']
  
X_axis = np.arange(len(X))
plt.figure(figsize=(20, 8))
plt.bar(X_axis - 0.2, Y, 0.4, label = '2011')
plt.bar(X_axis + 0.2, Z, 0.4, label = '2012')

plt.xticks(X_axis, X)
plt.xlabel("Districts")
plt.ylabel("Production")
plt.title("Production distribution")
plt.legend()

plt.show()


# Data process
effect = Gujarat_2011.merge(Gujarat_2012, on = 'District_Name')
effect['dif_prod'] = effect['Production_x'] - effect['Production_y']
effect['dif_area'] = effect['Area_x'] - effect['Area_y']
effect = effect[['District_Name', 'dif_area', 'dif_prod']]
effect['District_Name'] = effect['District_Name'].apply(lambda x: x[0]+x[1:].lower())
effect.loc[6,'District_Name'] = 'Dahod'
effect.loc[3,'District_Name'] = 'Banas Kantha'
effect.loc[14,'District_Name'] = 'Panch Mahals'
effect.loc[18,'District_Name'] = 'Sabar Kantha'


effect1 = effect.set_index('District_Name')['dif_prod']
m = folium.Map(location=[22.2587, 71.1924],tiles='cartodbpositron')#'CartoDB positron'
inp = effect1.to_numpy()
effect1 = pd.Series(inp, index = effect1.index)
folium.FitBounds([(22.2587-2,71.1924-2), (22.2587+2,71.1924+2)]).add_to(m)

import branca

colorscale = branca.colormap.linear.RdYlBu_08.to_step(data=inp, quantiles=np.arange(0,1,0.001)).scale(-150000, 129400.0)
def style_function(feature):
    popu = effect1.get((feature['properties']['NAME_2']), None)
    #print(feature['properties']['NAME_2'])
    #print(popu)
    return {
        "fillOpacity": 0.6,
        "weight": 0,
        "fillColor": "#black" if popu is None else colorscale(popu),
    }

with open(os.path.join('gujarat.geojson')) as f:
    location = json.load(f)
folium.GeoJson(
    location,
    style_function=style_function,
).add_to(m)

<folium.features.GeoJson at 0x7fcbdfbe2040>

m


effect2 = effect.set_index('District_Name')['dif_area']
m = folium.Map(location=[22.2587, 71.1924],tiles='cartodbpositron')#'CartoDB positron'
inp = (effect2.to_numpy())
effect1 = pd.Series(inp, index = effect1.index)
folium.FitBounds([(22.2587-2,71.1924-2), (22.2587+2,71.1924+2)]).add_to(m)
import branca

colorscale = branca.colormap.linear.RdYlBu_08.to_step(data=inp, quantiles=np.arange(0,1,0.001)).scale(-110200,119400)
def style_function(feature):
    popu = effect1.get((feature['properties']['NAME_2']), None)
    #print(feature['properties']['NAME_2'])
    #print(popu)
    return {
        "fillOpacity": 0.6,
        "weight": 0,
        "fillColor": "#black" if popu is None else colorscale(popu),
    }

with open(os.path.join('gujarat.geojson')) as f:
    location = json.load(f)
folium.GeoJson(
    location,
    style_function=style_function,
).add_to(m)

<folium.features.GeoJson at 0x7fcbdf821d90>

m

cs


cs1 = cs.iloc[1:][['Month','Cotton ROC', 'Soybeans ROC']].melt('Month', var_name = 'Crops', value_name = 'ROC')
cs1


# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Month'], empty='none')
# The basic line
#highlight = alt.selection(type='multi', on='mouseover',
#                          fields=['Month'], nearest=True)

line = alt.Chart(cs1).mark_line().encode(
    alt.X('Month:T'),
    alt.Y('ROC:Q',scale = alt.Scale(zero = False)),
    color='Crops:N',
    size=alt.condition(~nearest, alt.value(1.6), alt.value(12))
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(cs1).mark_point().encode(
    x='Month:T',
    opacity=alt.value(0),
).add_selection(
    nearest
)
# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'ROC:Q', alt.value(' '))
)
# Draw a rule at the location of the selection
rules = alt.Chart(cs1).mark_rule(color='gray').encode(
    x='Month:T',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=630, height=300)

	State Name	District Population
0	Andaman and Nicobar	4.309542e+05
1	Andhra Pradesh	5.649770e+07
2	Arunachal Pradesh	1.592486e+06
3	Assam	3.561591e+07
4	Bihar	1.184479e+08

	State_Name	District_Name	Crop_Year	Season	Crop	Area	Production
57515	Gujarat	AHMADABAD	2012	Kharif	Cotton(lint)	176800.0	302100.0
57864	Gujarat	AMRELI	2012	Kharif	Cotton(lint)	289300.0	242700.0
58103	Gujarat	ANAND	2012	Kharif	Cotton(lint)	7600.0	20800.0
58500	Gujarat	BANAS KANTHA	2012	Kharif	Cotton(lint)	45600.0	160700.0
58907	Gujarat	BHARUCH	2012	Kharif	Cotton(lint)	118900.0	271700.0

	Month	Cotton Price (Indian Rupee per Kilogram)	Soybeans Price (Indian Rupee per Metric Ton)	Cotton ROC	Soybeans ROC	Cotton / Soybeans Price Ratio
0	2007-04-01	53.11	13477.80	-	-	0.0039
1	2007-05-01	49.77	13624.53	-0.063	0.0109	0.0037
2	2007-06-01	54.62	14715.08	0.0976	0.08	0.0037
3	2007-07-01	60.62	15176.34	0.1099	0.0313	0.0040
4	2007-08-01	60.00	15714.78	-0.0103	0.0355	0.0038
...	...	...	...	...	...	...
176	2021-12-01	200.18	41859.34	-0.0367	0.0199	0.0048
177	2022-01-01	216.64	45132.08	0.0823	0.0782	0.0048
178	2022-02-01	228.85	49643.56	0.0563	0.1	0.0046
179	2022-03-01	237.12	54941.77	0.0361	0.1067	0.0043
180	2022-04-01	260.54	54911.05	0.0988	-0.0006	0.0047

	Month	Crops	ROC
0	2007-05-01	Cotton ROC	-0.063
1	2007-06-01	Cotton ROC	0.0976
2	2007-07-01	Cotton ROC	0.1099
3	2007-08-01	Cotton ROC	-0.0103
4	2007-09-01	Cotton ROC	0.0085
...	...	...	...
355	2021-12-01	Soybeans ROC	0.0199
356	2022-01-01	Soybeans ROC	0.0782
357	2022-02-01	Soybeans ROC	0.1
358	2022-03-01	Soybeans ROC	0.1067
359	2022-04-01	Soybeans ROC	-0.0006

Indian Crop Production Anlysis and Relevent Climate Effects¶

Presenters:¶

Table of Contents

Introduction¶

India economy sector distribution¶

Population dependency¶

Indian agriculture is particularly vulnerable due to:¶

Long coatline and relatively low altitude:¶

Indian top ten crop production since 2010¶

We can see the connection between temperature and CO2 emission¶

What happened to the cotton production in Gujarat and Maharashtra?¶

We can see that the production in most districts in Gujarat dropped a lot in two year¶

Blue: Decrease in production¶

Yellow or light blue: not increase or decrease that much¶

Red: Increase in production¶

The production in eastern part tends to grow by a little bit while the production in other area drops a lot¶

Blue: Decrease in Area¶

Yellow or light blue: not increase or decrease that much¶

Red: Increase in Area¶

The declination resulted from multiple reasons:¶

This is the prices and rate of change data of Soybean and Cotton since 2007¶

We can see that around the same period from 2011 to 2012, the price of cotton slumped by more than 40%. That's partly why the production also decreased¶

Some suggestions¶

If you are interested¶

all reported huge decline of cotton production in India¶

References¶