/********************************************************************************
********************************************************************************
*A. General information
********************************************************************************
********************************************************************************/

*Open the directory
********************************************************************************
cd "C:\Users\Alexandros\Desktop\Courses\MSc_Bioeconomics\2025_MSc_Bio\1.Data_Management"

*Open the log file
********************************************************************************
log using "Lecture1.smcl"

*Open the dataset
********************************************************************************
use "Greece_2024.dta", clear

*Variables' exposition - "describe"
********************************************************************************
describe

*Define the dataset
********************************************************************************
tsset year


********************************************************************************
********************************************************************************
*B. Variable Management
********************************************************************************
********************************************************************************

********************************************************************************
**"generate"
********************************************************************************
gen 	crisis=1 if year>2008
replace crisis=0 if year<2009
gen real_rate=yield_rate_10y-inflation
gen cgd_sq=cgd_GDP^2
gen lnpop=log(population)



********************************************************************************
**"rename"
********************************************************************************
rename crisis post_crisis 

********************************************************************************
**"relabel"
********************************************************************************
label variable cgd_GDP "Central Government Debt (%GDP) known as public debt"

********************************************************************************
**"order"
********************************************************************************
order year cgd_GDP yield_rate_10y real_rate

********************************************************************************
**"sort"
********************************************************************************
sort cgd_GDP
sort year

********************************************************************************
**"keep" or "drop"
********************************************************************************
drop cgd_sq
/* or
keep "the rest of variables" */

********************************************************************************
**"destring" or "tostring" or "encode"
********************************************************************************
**"tostring" and "destring"
tostring post_crisis, gen (dum_crisis)
destring dum_crisis, replace
drop 	 dum_crisis
**"encode"
gen 	high_infl="yes" if inflation>4
replace high_infl="no"  if inflation<4 
encode  high_infl, gen (high_inflation)

clear

log close
********************************************************************************
**************************04/10/2025**********************
********************************************************************************


********************************************************************************
**Metrics
********************************************************************************
**"mean"
egen   m_pdebt = 	mean(cgd_GDP)
egen med_pdebt =  median(cgd_GDP) 
egen min_pdebt =	 min(cgd_GDP)
egen max_pdebt = 	 max(cgd_GDP)
 gen sum_pdebt = 	 sum(cgd_GDP)
egen tot_pdebt =   total(cgd_GDP)


********************************************************************************
**Group identifiers
********************************************************************************
gen k1=_n
gen k2=_N

********************************************************************************
**Operators
********************************************************************************
gen lpdebt=  l.cgd_GDP /*lagged value*/
gen fpdebt=  f.cgd_GDP
gen dpdebt=  d.cgd_GDP
gen l4pdebt=l4.cgd_GDP
*
order year cgd_GDP *pdebt 

********************************************************************************
**Dummies and "if"
********************************************************************************
ta year, gen(yr)
gen 	pre_crisis=1 if year<2007
replace pre_crisis=0 if pre_crisis==.

********************************************************************************
**Symbols
********************************************************************************
gen 	pre_crisis_even_yr=1 if year==2000 |year==2002 | year==2004 | year==2006
replace pre_crisis_even_yr=0 if pre_crisis_even_yr==. & year<2007
replace pre_crisis_even_yr=2 if year>2006

********************************************************************************
**Descriptive statistics
********************************************************************************
**"sum"
sum cgd_GDP yield_rate_10y inflation uneml_rate GDP_pc_2010 
sum cgd_GDP yield_rate_10y inflation uneml_rate GDP_pc_2010, detail

**"tabulate"
ta high_inflation

**"codebook"
codebook cgd_GDP

**"list"
list year cgd_GDP yield_rate_10y in 1/10
list year m* in 1/15
list year *pdebt if year==2012

clear


********************************************************************************
********************************************************************************
*C. Dataset Management
********************************************************************************
********************************************************************************
*Open the directory
********************************************************************************
cd "C:\Users\Alexandros\Desktop\Courses\MSc_Bioeconomics\2025_MSc_Bio\1.Data_Management"

********************************************************************************
*Import files
********************************************************************************
import excel "Greece_2000_24.xls", sheet("economic_2019") firstrow
save         "Economic_2019.dta"
clear
**Note: We imported and saved this file with the name "Economic_2019".

********
import excel "Greece_2000_24.xls", sheet("other_2019") firstrow
save         "Other_2019.dta"
clear
**Note: We imported and saved this file with the name "Other_2019".

********
import excel "Greece_2000_24.xls", sheet("post_2019") firstrow
save         "Post_2019.dta"
clear
**Note: We imported and saved this file with the name "Post_2019".

********************************************************************************
*Merge
********************************************************************************
use 				 "Economic_2019.dta", clear
merge m:m year using "Other_2019.dta"
drop _merge
save 				 "Pre_2019.dta"
clear

********************************************************************************
*Append
********************************************************************************
use 		 "Pre_2019.dta", clear
append using "Post_2019.dta"
save 		 "Greece_2024.dta"
clear

********************************************************************************
*Reshape
********************************************************************************
import excel "Greece_2000_24.xlsx", sheet("wide_debt") firstrow
drop Z
destring yr2023, replace
**
reshape long yr, i(ccode) j(year)
rename yr public_debt
save  "panel_debt.dta", replace
clear

********************************************************************************
*Collapse
********************************************************************************
use "panel_debt.dta", clear
*****
collapse public_debt, by(year)
**Note: We obtain the variable mean per year.
clear

********************************************************************************
*Close the log file
********************************************************************************
log close