Big Data and Python Programming Code


"""DATA SCOPE:There are 3 data files data_10.txt, data_11.txt, data_12.txt of class 10th, 11th, 12th respectively, which contains students data in
Given format (separated by pipe)

Id|Name|city|Grade|Age
s001|Rohan|mumbai|A|16

There is another file which contains scholarship Info (scholarship.txt) (not all the students get scholarship)

Id|Family_Income|No_Of_Members
s013|2000|10

find out all the scholarship getting students who got either C or D grade and who are from 'Mumbai' and whose Family_Income < 5000"""

import sys

def openFile(filename='',mode='r'):
   
    if (filename != ""):
        fileHandler = open(filename,mode)
        if fileHandler:
            return fileHandler
        else:
            print"\nFile was not opened\n"
            sys.exit()
    else:
        print"\n Please specify the filename"
        sys.exit()
       
def fileClose(fileHandler):
    fileHandler.close()
   
def processFile(fileHandler):
    perLineInformation = fileHandler.readlines()
   
    # It is a list contain all the lines in it.
    if(len(perLineInformation) > 0):
        return perLineInformation
    else:
        print"\n File is Empty"
       
def main(filename1,filename2='scholarship.txt'):
   
    file1 = openFile('filename1')   #opening student data file
    file2 = openFile('filename2')   #opening schorship.txt file
   
    file1List = processFile(file1)      #all lines are in a list of sentences
    file2List = processFile(file2)     
   
    listofID2 = []       
    for information2 in file2List:
        splitedInformation2 = information2.split('|')
       
        #As the id is common and student with scholarship less than 5000 is getting scholarship
       
        if ( splitedInformation2[1] < '5000'):                    #criteria1 is salary less than 5000 Rs.
            listofID2.append(splitedInformation2)                   #collected all id having salary less than 5000
   
    listofFilteredStudent =[]
   
    for information in file1List:
        splitedInformation = information.split('|')
       
        # criteria2 is grade must be 'C' or 'D' and Criteria3 is 'Must be resident of Mumbai' 
       
        if (splitedInformation[2]=='Mumbai' and (splitedInformation[3]=='C' or splitedInformation[3]=='D') and (splitedInformation[0] in listofID2)):
           
            listofFilteredStudent.append(information)
           
            outputFile = openFile('result.txt','a')
            outputFile.write(information)
            fileClose(outputFile)
           
    fileClose(file1)
    fileClose(file2)
###### Run this program by just entering the file name

main('data_10.txt')
main('data_11.txt')
main('data_12.txt')