☆
//a. Inport the dataset and create df and prlnt Scheoa
val dfl = spark.read .fornat( saxcc- "csv")
.optionCheader", truć") //rcading the headcrs . opt lon( "itode", ••PftOPWLFOftHED")
•load( path . 7Users/bharoavichcnnupatl/Pownloads/LAB/WorldCups.csv")
val df2 = spark.read .tomat< scu-ce- "csv")
.optionCheader", truć") //rcading thc headcrs .optionCnode", "OftOPHALFORMED")
.load( patb- ,7Users/bharqavlchennupati/Pc»»nloads/LAB/NorldCupPlayers.csv“)
val df3 = spark.read .fonrat( sarcc- “csv")
.optionCheader”, “true") //reading the headers . opt ion ("irode". "OflOPMAi.FOWMEp")
.load( path ■ “/Uscrs/bhdroayictłcnnupati/Dwnioads/LAB/WorldCupHatchcs.csy")
// Printing the Schena dfl.printSchenaO df2.printSchena() df3.printSche*aO
//b.Perfom 10 intuitlve questlons In Dataset //For thls prób len we have used thel&iark Sql on DataFranes
//First of all create three Teirp View dfl.createOrfieplaccTenpView( vic»Namc. "WC") df2.crcatcOrRcplaccTcnpVicw( viewNacr>e - "Playcrs") df3.crcatc0rftcplaccTcnpVicw( yicwNan-.i-a "Natchcs") // Find the winner by years uslng WorldCup view
val O = sparfc.sql< iqlText - "select Winner, Country, Year fro* WC Order By Country ")
//Find the goals by years using WorldCup view
val 01 » spark.sql( sqlT*«t - “select Oualif ledTeans, MatchesPlayed, Year fron WC WWERE Country ‘Brazll* Order By Year")
//Cities that hostcd highcst world cup natchcs on vicw wcttatchcs
vat 02 = spark.sql( sqlTw< - "sclcct Count(City),City fron Hatchcs Group By City")
Q2.show() //Find the goals by years using WorldCup view
val Q1 = spark.sql( sqiT«xt - “select Oualif:edTeans, MatchesPlayed, Year fron we WHERE Country = ‘Brazll* Order By Year") Ol.showO
//Cities that hosted highest world cup natches on view wcMatches
val 02 ■ spark.sql( o ir*t- "select Count(City),City fron Matchcs Group By City )
O2.show()