import requests
import json
import sys
import multiprocessing

#the function for reading the results
def getState():
	statefile = open("crawl-state.json","r")
	state = json.loads(statefile.read())
	statefile.close()
	return state

#load the results obtained yet
results = getState()

#load the full list of sites
fileHandle = open("top-1m.csv","r")

#open the reduced list of sites
fileHandle2 = open("top-reduced.csv","w")

#add uncrawled sites to the list of reduced sites
line = fileHandle.readline()
while (line):
	splited = line.split(",")

	if not splited[0] in results or not results[splited[0]][1] in ["Sucess","Success"]:
		fileHandle2.write(line)

	line = fileHandle.readline()

#close both lists
fileHandle.close()
fileHandle2.close()
