Python module (submodule repositary), which provides content (video streams) from various online stream sources to corresponding Enigma2, Kodi, Plex plugins

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. r""" A JSON data encoder and decoder.
  5. This Python module implements the JSON (http://json.org/) data
  6. encoding format; a subset of ECMAScript (aka JavaScript) for encoding
  7. primitive data types (numbers, strings, booleans, lists, and
  8. associative arrays) in a language-neutral simple text-based syntax.
  9. It can encode or decode between JSON formatted strings and native
  10. Python data types. Normally you would use the encode() and decode()
  11. functions defined by this module, but if you want more control over
  12. the processing you can use the JSON class.
  13. This implementation tries to be as completely cormforming to all
  14. intricacies of the standards as possible. It can operate in strict
  15. mode (which only allows JSON-compliant syntax) or a non-strict mode
  16. (which allows much more of the whole ECMAScript permitted syntax).
  17. This includes complete support for Unicode strings (including
  18. surrogate-pairs for non-BMP characters), and all number formats
  19. including negative zero and IEEE 754 non-numbers such a NaN or
  20. Infinity.
  21. The JSON/ECMAScript to Python type mappings are:
  22. ---JSON--- ---Python---
  23. null None
  24. undefined undefined (note 1)
  25. Boolean (true,false) bool (True or False)
  26. Integer int or long (note 2)
  27. Float float
  28. String str or unicode ( "..." or u"..." )
  29. Array [a, ...] list ( [...] )
  30. Object {a:b, ...} dict ( {...} )
  31. -- Note 1. an 'undefined' object is declared in this module which
  32. represents the native Python value for this type when in
  33. non-strict mode.
  34. -- Note 2. some ECMAScript integers may be up-converted to Python
  35. floats, such as 1e+40. Also integer -0 is converted to
  36. float -0, so as to preserve the sign (which ECMAScript requires).
  37. -- Note 3. numbers requiring more significant digits than can be
  38. represented by the Python float type will be converted into a
  39. Python Decimal type, from the standard 'decimal' module.
  40. In addition, when operating in non-strict mode, several IEEE 754
  41. non-numbers are also handled, and are mapped to specific Python
  42. objects declared in this module:
  43. NaN (not a number) nan (float('nan'))
  44. Infinity, +Infinity inf (float('inf'))
  45. -Infinity neginf (float('-inf'))
  46. When encoding Python objects into JSON, you may use types other than
  47. native lists or dictionaries, as long as they support the minimal
  48. interfaces required of all sequences or mappings. This means you can
  49. use generators and iterators, tuples, UserDict subclasses, etc.
  50. To make it easier to produce JSON encoded representations of user
  51. defined classes, if the object has a method named json_equivalent(),
  52. then it will call that method and attempt to encode the object
  53. returned from it instead. It will do this recursively as needed and
  54. before any attempt to encode the object using it's default
  55. strategies. Note that any json_equivalent() method should return
  56. "equivalent" Python objects to be encoded, not an already-encoded
  57. JSON-formatted string. There is no such aid provided to decode
  58. JSON back into user-defined classes as that would dramatically
  59. complicate the interface.
  60. When decoding strings with this module it may operate in either
  61. strict or non-strict mode. The strict mode only allows syntax which
  62. is conforming to RFC 7159 (JSON), while the non-strict allows much
  63. more of the permissible ECMAScript syntax.
  64. The following are permitted when processing in NON-STRICT mode:
  65. * Unicode format control characters are allowed anywhere in the input.
  66. * All Unicode line terminator characters are recognized.
  67. * All Unicode white space characters are recognized.
  68. * The 'undefined' keyword is recognized.
  69. * Hexadecimal number literals are recognized (e.g., 0xA6, 0177).
  70. * String literals may use either single or double quote marks.
  71. * Strings may contain \x (hexadecimal) escape sequences, as well as the
  72. \v and \0 escape sequences.
  73. * Lists may have omitted (elided) elements, e.g., [,,,,,], with
  74. missing elements interpreted as 'undefined' values.
  75. * Object properties (dictionary keys) can be of any of the
  76. types: string literals, numbers, or identifiers (the later of
  77. which are treated as if they are string literals)---as permitted
  78. by ECMAScript. JSON only permits strings literals as keys.
  79. Concerning non-strict and non-ECMAScript allowances:
  80. * Octal numbers: If you allow the 'octal_numbers' behavior (which
  81. is never enabled by default), then you can use octal integers
  82. and octal character escape sequences (per the ECMAScript
  83. standard Annex B.1.2). This behavior is allowed, if enabled,
  84. because it was valid JavaScript at one time.
  85. * Multi-line string literals: Strings which are more than one
  86. line long (contain embedded raw newline characters) are never
  87. permitted. This is neither valid JSON nor ECMAScript. Some other
  88. JSON implementations may allow this, but this module considers
  89. that behavior to be a mistake.
  90. References:
  91. * JSON (JavaScript Object Notation)
  92. <http://json.org/>
  93. * RFC 7159. The application/json Media Type for JavaScript Object Notation (JSON)
  94. <http://www.ietf.org/rfc/rfc7159.txt>
  95. * ECMA-262 3rd edition (1999)
  96. <http://www.ecma-international.org/publications/files/ecma-st/ECMA-262.pdf>
  97. * IEEE 754-1985: Standard for Binary Floating-Point Arithmetic.
  98. <http://www.cs.berkeley.edu/~ejr/Projects/ieee754/>
  99. """
  100. __author__ = "Deron Meranda <http://deron.meranda.us/>"
  101. __homepage__ = "http://deron.meranda.us/python/demjson/"
  102. __date__ = "2015-12-22"
  103. __version__ = "2.2.4"
  104. __version_info__ = ( 2, 2, 4 ) # Will be converted into a namedtuple below
  105. __credits__ = """Copyright (c) 2006-2015 Deron E. Meranda <http://deron.meranda.us/>
  106. Licensed under GNU LGPL (GNU Lesser General Public License) version 3.0
  107. or later. See LICENSE.txt included with this software.
  108. This program is free software: you can redistribute it and/or modify
  109. it under the terms of the GNU Lesser General Public License as
  110. published by the Free Software Foundation, either version 3 of the
  111. License, or (at your option) any later version.
  112. This program is distributed in the hope that it will be useful,
  113. but WITHOUT ANY WARRANTY; without even the implied warranty of
  114. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  115. GNU General Public License for more details.
  116. You should have received a copy of the GNU Lesser General Public License
  117. along with this program. If not, see <http://www.gnu.org/licenses/>
  118. or <http://www.fsf.org/licensing/>.
  119. """
  120. # ----------------------------------------------------------------------
  121. # Set demjson version
  122. try:
  123. from collections import namedtuple as _namedtuple
  124. __version_info__ = _namedtuple('version_info', ['major', 'minor', 'micro'])( *__version_info__ )
  125. except ImportError:
  126. raise ImportError("demjson %s requires a Python 2.6 or later" % __version__ )
  127. version, version_info = __version__, __version_info__
  128. # Determine Python version
  129. _py_major, _py_minor = None, None
  130. def _get_pyver():
  131. global _py_major, _py_minor
  132. import sys
  133. vi = sys.version_info
  134. try:
  135. _py_major, _py_minor = vi.major, vi.minor
  136. except AttributeError:
  137. _py_major, _py_minor = vi[0], vi[1]
  138. _get_pyver()
  139. # ----------------------------------------------------------------------
  140. # Useful global constants
  141. content_type = 'application/json'
  142. file_ext = 'json'
  143. class _dummy_context_manager(object):
  144. """A context manager that does nothing on entry or exit."""
  145. def __enter__(self):
  146. pass
  147. def __exit__(self, exc_type, exc_val, exc_tb):
  148. return False
  149. _dummy_context_manager = _dummy_context_manager()
  150. # ----------------------------------------------------------------------
  151. # Decimal and float types.
  152. #
  153. # If a JSON number can not be stored in a Python float without loosing
  154. # precision and the Python has the decimal type, then we will try to
  155. # use decimal instead of float. To make this determination we need to
  156. # know the limits of the float type, but Python doesn't have an easy
  157. # way to tell what the largest floating-point number it supports. So,
  158. # we detemine the precision and scale of the float type by testing it.
  159. try:
  160. # decimal module was introduced in Python 2.4
  161. import decimal
  162. except ImportError:
  163. decimal = None
  164. def determine_float_limits( number_type=float ):
  165. """Determines the precision and range of the given float type.
  166. The passed in 'number_type' argument should refer to the type of
  167. floating-point number. It should either be the built-in 'float',
  168. or decimal context or constructor; i.e., one of:
  169. # 1. FLOAT TYPE
  170. determine_float_limits( float )
  171. # 2. DEFAULT DECIMAL CONTEXT
  172. determine_float_limits( decimal.Decimal )
  173. # 3. CUSTOM DECIMAL CONTEXT
  174. ctx = decimal.Context( prec=75 )
  175. determine_float_limits( ctx )
  176. Returns a named tuple with components:
  177. ( significant_digits,
  178. max_exponent,
  179. min_exponent )
  180. Where:
  181. * significant_digits -- maximum number of *decimal* digits
  182. that can be represented without any loss of precision.
  183. This is conservative, so if there are 16 1/2 digits, it
  184. will return 16, not 17.
  185. * max_exponent -- The maximum exponent (power of 10) that can
  186. be represented before an overflow (or rounding to
  187. infinity) occurs.
  188. * min_exponent -- The minimum exponent (negative power of 10)
  189. that can be represented before either an underflow
  190. (rounding to zero) or a subnormal result (loss of
  191. precision) occurs. Note this is conservative, as
  192. subnormal numbers are excluded.
  193. """
  194. if decimal:
  195. numeric_exceptions = (ValueError,decimal.Overflow,decimal.Underflow)
  196. else:
  197. numeric_exceptions = (ValueError,)
  198. if decimal and number_type == decimal.Decimal:
  199. number_type = decimal.DefaultContext
  200. if decimal and isinstance(number_type, decimal.Context):
  201. # Passed a decimal Context, extract the bound creator function.
  202. create_num = number_type.create_decimal
  203. decimal_ctx = decimal.localcontext(number_type)
  204. is_zero_or_subnormal = lambda n: n.is_zero() or n.is_subnormal()
  205. elif number_type == float:
  206. create_num = number_type
  207. decimal_ctx = _dummy_context_manager
  208. is_zero_or_subnormal = lambda n: n==0
  209. else:
  210. raise TypeError("Expected a float type, e.g., float or decimal context")
  211. with decimal_ctx:
  212. zero = create_num('0.0')
  213. # Find signifianct digits by comparing floats of increasing
  214. # number of digits, differing in the last digit only, until
  215. # they numerically compare as being equal.
  216. sigdigits = None
  217. n = 0
  218. while True:
  219. n = n + 1
  220. pfx = '0.' + '1'*n
  221. a = create_num( pfx + '0')
  222. for sfx in '123456789': # Check all possible last digits to
  223. # avoid any partial-decimal.
  224. b = create_num( pfx + sfx )
  225. if (a+zero) == (b+zero):
  226. sigdigits = n
  227. break
  228. if sigdigits:
  229. break
  230. # Find exponent limits. First find order of magnitude and
  231. # then use a binary search to find the exact exponent.
  232. base = '1.' + '1'*(sigdigits-1)
  233. base0 = '1.' + '1'*(sigdigits-2)
  234. minexp, maxexp = None, None
  235. for expsign in ('+','-'):
  236. minv = 0; maxv = 10
  237. # First find order of magnitude of exponent limit
  238. while True:
  239. try:
  240. s = base + 'e' + expsign + str(maxv)
  241. s0 = base0 + 'e' + expsign + str(maxv)
  242. f = create_num( s ) + zero
  243. f0 = create_num( s0 ) + zero
  244. except numeric_exceptions:
  245. f = None
  246. if not f or not str(f)[0].isdigit() or is_zero_or_subnormal(f) or f==f0:
  247. break
  248. else:
  249. minv = maxv
  250. maxv = maxv * 10
  251. # Now do a binary search to find exact limit
  252. while True:
  253. if minv+1 == maxv:
  254. if expsign=='+':
  255. maxexp = minv
  256. else:
  257. minexp = minv
  258. break
  259. elif maxv < minv:
  260. if expsign=='+':
  261. maxexp = None
  262. else:
  263. minexp = None
  264. break
  265. m = (minv + maxv) // 2
  266. try:
  267. s = base + 'e' + expsign + str(m)
  268. s0 = base0 + 'e' + expsign + str(m)
  269. f = create_num( s ) + zero
  270. f0 = create_num( s0 ) + zero
  271. except numeric_exceptions:
  272. f = None
  273. else:
  274. if not f or not str(f)[0].isdigit():
  275. f = None
  276. elif is_zero_or_subnormal(f) or f==f0:
  277. f = None
  278. if not f:
  279. # infinite
  280. maxv = m
  281. else:
  282. minv = m
  283. return _namedtuple('float_limits', ['significant_digits', 'max_exponent', 'min_exponent'])( sigdigits, maxexp, -minexp )
  284. float_sigdigits, float_maxexp, float_minexp = determine_float_limits( float )
  285. # For backwards compatibility with older demjson versions:
  286. def determine_float_precision():
  287. v = determine_float_limits( float )
  288. return ( v.significant_digits, v.max_exponent )
  289. # ----------------------------------------------------------------------
  290. # The undefined value.
  291. #
  292. # ECMAScript has an undefined value (similar to yet distinct from null).
  293. # Neither Python or strict JSON have support undefined, but to allow
  294. # JavaScript behavior we must simulate it.
  295. class _undefined_class(object):
  296. """Represents the ECMAScript 'undefined' value."""
  297. __slots__ = []
  298. def __repr__(self):
  299. return self.__module__ + '.undefined'
  300. def __str__(self):
  301. return 'undefined'
  302. def __nonzero__(self):
  303. return False
  304. undefined = _undefined_class()
  305. syntax_error = _undefined_class() # same as undefined, but has separate identity
  306. del _undefined_class
  307. # ----------------------------------------------------------------------
  308. # Non-Numbers: NaN, Infinity, -Infinity
  309. #
  310. # ECMAScript has official support for non-number floats, although
  311. # strict JSON does not. Python doesn't either. So to support the
  312. # full JavaScript behavior we must try to add them into Python, which
  313. # is unfortunately a bit of black magic. If our python implementation
  314. # happens to be built on top of IEEE 754 we can probably trick python
  315. # into using real floats. Otherwise we must simulate it with classes.
  316. def _nonnumber_float_constants():
  317. """Try to return the Nan, Infinity, and -Infinity float values.
  318. This is necessarily complex because there is no standard
  319. platform-independent way to do this in Python as the language
  320. (opposed to some implementation of it) doesn't discuss
  321. non-numbers. We try various strategies from the best to the
  322. worst.
  323. If this Python interpreter uses the IEEE 754 floating point
  324. standard then the returned values will probably be real instances
  325. of the 'float' type. Otherwise a custom class object is returned
  326. which will attempt to simulate the correct behavior as much as
  327. possible.
  328. """
  329. try:
  330. # First, try (mostly portable) float constructor. Works under
  331. # Linux x86 (gcc) and some Unices.
  332. nan = float('nan')
  333. inf = float('inf')
  334. neginf = float('-inf')
  335. except ValueError:
  336. try:
  337. # Try the AIX (PowerPC) float constructors
  338. nan = float('NaNQ')
  339. inf = float('INF')
  340. neginf = float('-INF')
  341. except ValueError:
  342. try:
  343. # Next, try binary unpacking. Should work under
  344. # platforms using IEEE 754 floating point.
  345. import struct, sys
  346. xnan = '7ff8000000000000'.decode('hex') # Quiet NaN
  347. xinf = '7ff0000000000000'.decode('hex')
  348. xcheck = 'bdc145651592979d'.decode('hex') # -3.14159e-11
  349. # Could use float.__getformat__, but it is a new python feature,
  350. # so we use sys.byteorder.
  351. if sys.byteorder == 'big':
  352. nan = struct.unpack('d', xnan)[0]
  353. inf = struct.unpack('d', xinf)[0]
  354. check = struct.unpack('d', xcheck)[0]
  355. else:
  356. nan = struct.unpack('d', xnan[::-1])[0]
  357. inf = struct.unpack('d', xinf[::-1])[0]
  358. check = struct.unpack('d', xcheck[::-1])[0]
  359. neginf = - inf
  360. if check != -3.14159e-11:
  361. raise ValueError('Unpacking raw IEEE 754 floats does not work')
  362. except (ValueError, TypeError):
  363. # Punt, make some fake classes to simulate. These are
  364. # not perfect though. For instance nan * 1.0 == nan,
  365. # as expected, but 1.0 * nan == 0.0, which is wrong.
  366. class nan(float):
  367. """An approximation of the NaN (not a number) floating point number."""
  368. def __repr__(self): return 'nan'
  369. def __str__(self): return 'nan'
  370. def __add__(self,x): return self
  371. def __radd__(self,x): return self
  372. def __sub__(self,x): return self
  373. def __rsub__(self,x): return self
  374. def __mul__(self,x): return self
  375. def __rmul__(self,x): return self
  376. def __div__(self,x): return self
  377. def __rdiv__(self,x): return self
  378. def __divmod__(self,x): return (self,self)
  379. def __rdivmod__(self,x): return (self,self)
  380. def __mod__(self,x): return self
  381. def __rmod__(self,x): return self
  382. def __pow__(self,exp): return self
  383. def __rpow__(self,exp): return self
  384. def __neg__(self): return self
  385. def __pos__(self): return self
  386. def __abs__(self): return self
  387. def __lt__(self,x): return False
  388. def __le__(self,x): return False
  389. def __eq__(self,x): return False
  390. def __neq__(self,x): return True
  391. def __ge__(self,x): return False
  392. def __gt__(self,x): return False
  393. def __complex__(self,*a): raise NotImplementedError('NaN can not be converted to a complex')
  394. if decimal:
  395. nan = decimal.Decimal('NaN')
  396. else:
  397. nan = nan()
  398. class inf(float):
  399. """An approximation of the +Infinity floating point number."""
  400. def __repr__(self): return 'inf'
  401. def __str__(self): return 'inf'
  402. def __add__(self,x): return self
  403. def __radd__(self,x): return self
  404. def __sub__(self,x): return self
  405. def __rsub__(self,x): return self
  406. def __mul__(self,x):
  407. if x is neginf or x < 0:
  408. return neginf
  409. elif x == 0:
  410. return nan
  411. else:
  412. return self
  413. def __rmul__(self,x): return self.__mul__(x)
  414. def __div__(self,x):
  415. if x == 0:
  416. raise ZeroDivisionError('float division')
  417. elif x < 0:
  418. return neginf
  419. else:
  420. return self
  421. def __rdiv__(self,x):
  422. if x is inf or x is neginf or x is nan:
  423. return nan
  424. return 0.0
  425. def __divmod__(self,x):
  426. if x == 0:
  427. raise ZeroDivisionError('float divmod()')
  428. elif x < 0:
  429. return (nan,nan)
  430. else:
  431. return (self,self)
  432. def __rdivmod__(self,x):
  433. if x is inf or x is neginf or x is nan:
  434. return (nan, nan)
  435. return (0.0, x)
  436. def __mod__(self,x):
  437. if x == 0:
  438. raise ZeroDivisionError('float modulo')
  439. else:
  440. return nan
  441. def __rmod__(self,x):
  442. if x is inf or x is neginf or x is nan:
  443. return nan
  444. return x
  445. def __pow__(self, exp):
  446. if exp == 0:
  447. return 1.0
  448. else:
  449. return self
  450. def __rpow__(self, x):
  451. if -1 < x < 1: return 0.0
  452. elif x == 1.0: return 1.0
  453. elif x is nan or x is neginf or x < 0:
  454. return nan
  455. else:
  456. return self
  457. def __neg__(self): return neginf
  458. def __pos__(self): return self
  459. def __abs__(self): return self
  460. def __lt__(self,x): return False
  461. def __le__(self,x):
  462. if x is self:
  463. return True
  464. else:
  465. return False
  466. def __eq__(self,x):
  467. if x is self:
  468. return True
  469. else:
  470. return False
  471. def __neq__(self,x):
  472. if x is self:
  473. return False
  474. else:
  475. return True
  476. def __ge__(self,x): return True
  477. def __gt__(self,x): return True
  478. def __complex__(self,*a): raise NotImplementedError('Infinity can not be converted to a complex')
  479. if decimal:
  480. inf = decimal.Decimal('Infinity')
  481. else:
  482. inf = inf()
  483. class neginf(float):
  484. """An approximation of the -Infinity floating point number."""
  485. def __repr__(self): return '-inf'
  486. def __str__(self): return '-inf'
  487. def __add__(self,x): return self
  488. def __radd__(self,x): return self
  489. def __sub__(self,x): return self
  490. def __rsub__(self,x): return self
  491. def __mul__(self,x):
  492. if x is self or x < 0:
  493. return inf
  494. elif x == 0:
  495. return nan
  496. else:
  497. return self
  498. def __rmul__(self,x): return self.__mul__(self)
  499. def __div__(self,x):
  500. if x == 0:
  501. raise ZeroDivisionError('float division')
  502. elif x < 0:
  503. return inf
  504. else:
  505. return self
  506. def __rdiv__(self,x):
  507. if x is inf or x is neginf or x is nan:
  508. return nan
  509. return -0.0
  510. def __divmod__(self,x):
  511. if x == 0:
  512. raise ZeroDivisionError('float divmod()')
  513. elif x < 0:
  514. return (nan,nan)
  515. else:
  516. return (self,self)
  517. def __rdivmod__(self,x):
  518. if x is inf or x is neginf or x is nan:
  519. return (nan, nan)
  520. return (-0.0, x)
  521. def __mod__(self,x):
  522. if x == 0:
  523. raise ZeroDivisionError('float modulo')
  524. else:
  525. return nan
  526. def __rmod__(self,x):
  527. if x is inf or x is neginf or x is nan:
  528. return nan
  529. return x
  530. def __pow__(self,exp):
  531. if exp == 0:
  532. return 1.0
  533. else:
  534. return self
  535. def __rpow__(self, x):
  536. if x is nan or x is inf or x is inf:
  537. return nan
  538. return 0.0
  539. def __neg__(self): return inf
  540. def __pos__(self): return self
  541. def __abs__(self): return inf
  542. def __lt__(self,x): return True
  543. def __le__(self,x): return True
  544. def __eq__(self,x):
  545. if x is self:
  546. return True
  547. else:
  548. return False
  549. def __neq__(self,x):
  550. if x is self:
  551. return False
  552. else:
  553. return True
  554. def __ge__(self,x):
  555. if x is self:
  556. return True
  557. else:
  558. return False
  559. def __gt__(self,x): return False
  560. def __complex__(self,*a): raise NotImplementedError('-Infinity can not be converted to a complex')
  561. if decimal:
  562. neginf = decimal.Decimal('-Infinity')
  563. else:
  564. neginf = neginf(0)
  565. return nan, inf, neginf
  566. nan, inf, neginf = _nonnumber_float_constants()
  567. del _nonnumber_float_constants
  568. # ----------------------------------------------------------------------
  569. # Integers
  570. class json_int( (1L).__class__ ): # Have to specify base this way to satisfy 2to3
  571. """A subclass of the Python int/long that remembers its format (hex,octal,etc).
  572. Initialize it the same as an int, but also accepts an additional keyword
  573. argument 'number_format' which should be one of the NUMBER_FORMAT_* values.
  574. n = json_int( x[, base, number_format=NUMBER_FORMAT_DECIMAL] )
  575. """
  576. def __new__(cls, *args, **kwargs):
  577. if 'number_format' in kwargs:
  578. number_format = kwargs['number_format']
  579. del kwargs['number_format']
  580. if number_format not in (NUMBER_FORMAT_DECIMAL, NUMBER_FORMAT_HEX, NUMBER_FORMAT_OCTAL, NUMBER_FORMAT_LEGACYOCTAL, NUMBER_FORMAT_BINARY):
  581. raise TypeError("json_int(): Invalid value for number_format argument")
  582. else:
  583. number_format = NUMBER_FORMAT_DECIMAL
  584. obj = super(json_int,cls).__new__(cls,*args,**kwargs)
  585. obj._jsonfmt = number_format
  586. return obj
  587. @property
  588. def number_format(self):
  589. """The original radix format of the number"""
  590. return self._jsonfmt
  591. def json_format(self):
  592. """Returns the integer value formatted as a JSON literal"""
  593. fmt = self._jsonfmt
  594. if fmt == NUMBER_FORMAT_HEX:
  595. return format(self, '#x')
  596. elif fmt == NUMBER_FORMAT_OCTAL:
  597. return format(self, '#o')
  598. elif fmt == NUMBER_FORMAT_BINARY:
  599. return format(self, '#b')
  600. elif fmt == NUMBER_FORMAT_LEGACYOCTAL:
  601. if self==0:
  602. return '0' # For some reason Python's int doesn't do '00'
  603. elif self < 0:
  604. return '-0%o' % (-self)
  605. else:
  606. return '0%o' % self
  607. else:
  608. return str(self)
  609. # ----------------------------------------------------------------------
  610. # String processing helpers
  611. def skipstringsafe( s, start=0, end=None ):
  612. i = start
  613. #if end is None:
  614. # end = len(s)
  615. unsafe = helpers.unsafe_string_chars
  616. while i < end and s[i] not in unsafe:
  617. #c = s[i]
  618. #if c in unsafe_string_chars:
  619. # break
  620. i += 1
  621. return i
  622. def skipstringsafe_slow( s, start=0, end=None ):
  623. i = start
  624. if end is None:
  625. end = len(s)
  626. while i < end:
  627. c = s[i]
  628. if c == '"' or c == "'" or c == '\\' or ord(c) <= 0x1f:
  629. break
  630. i += 1
  631. return i
  632. def extend_list_with_sep( orig_seq, extension_seq, sepchar='' ):
  633. if not sepchar:
  634. orig_seq.extend( extension_seq )
  635. else:
  636. for i, x in enumerate(extension_seq):
  637. if i > 0:
  638. orig_seq.append( sepchar )
  639. orig_seq.append( x )
  640. def extend_and_flatten_list_with_sep( orig_seq, extension_seq, separator='' ):
  641. for i, part in enumerate(extension_seq):
  642. if i > 0 and separator:
  643. orig_seq.append( separator )
  644. orig_seq.extend( part )
  645. # ----------------------------------------------------------------------
  646. # Unicode UTF-32
  647. # ----------------------------------------------------------------------
  648. def _make_raw_bytes( byte_list ):
  649. """Takes a list of byte values (numbers) and returns a bytes (Python 3) or string (Python 2)
  650. """
  651. if _py_major >= 3:
  652. b = bytes( byte_list )
  653. else:
  654. b = ''.join(chr(n) for n in byte_list)
  655. return b
  656. import codecs
  657. class utf32(codecs.CodecInfo):
  658. """Unicode UTF-32 and UCS4 encoding/decoding support.
  659. This is for older Pythons whch did not have UTF-32 codecs.
  660. JSON requires that all JSON implementations must support the
  661. UTF-32 encoding (as well as UTF-8 and UTF-16). But earlier
  662. versions of Python did not provide a UTF-32 codec, so we must
  663. implement UTF-32 ourselves in case we need it.
  664. See http://en.wikipedia.org/wiki/UTF-32
  665. """
  666. BOM_UTF32_BE = _make_raw_bytes([ 0, 0, 0xFE, 0xFF ]) #'\x00\x00\xfe\xff'
  667. BOM_UTF32_LE = _make_raw_bytes([ 0xFF, 0xFE, 0, 0 ]) #'\xff\xfe\x00\x00'
  668. @staticmethod
  669. def lookup( name ):
  670. """A standard Python codec lookup function for UCS4/UTF32.
  671. If if recognizes an encoding name it returns a CodecInfo
  672. structure which contains the various encode and decoder
  673. functions to use.
  674. """
  675. ci = None
  676. name = name.upper()
  677. if name in ('UCS4BE','UCS-4BE','UCS-4-BE','UTF32BE','UTF-32BE','UTF-32-BE'):
  678. ci = codecs.CodecInfo( utf32.utf32be_encode, utf32.utf32be_decode, name='utf-32be')
  679. elif name in ('UCS4LE','UCS-4LE','UCS-4-LE','UTF32LE','UTF-32LE','UTF-32-LE'):
  680. ci = codecs.CodecInfo( utf32.utf32le_encode, utf32.utf32le_decode, name='utf-32le')
  681. elif name in ('UCS4','UCS-4','UTF32','UTF-32'):
  682. ci = codecs.CodecInfo( utf32.encode, utf32.decode, name='utf-32')
  683. return ci
  684. @staticmethod
  685. def encode( obj, errors='strict', endianness=None, include_bom=True ):
  686. """Encodes a Unicode string into a UTF-32 encoded byte string.
  687. Returns a tuple: (bytearray, num_chars)
  688. The errors argument should be one of 'strict', 'ignore', or 'replace'.
  689. The endianness should be one of:
  690. * 'B', '>', or 'big' -- Big endian
  691. * 'L', '<', or 'little' -- Little endien
  692. * None -- Default, from sys.byteorder
  693. If include_bom is true a Byte-Order Mark will be written to
  694. the beginning of the string, otherwise it will be omitted.
  695. """
  696. import sys, struct
  697. # Make a container that can store bytes
  698. if _py_major >= 3:
  699. f = bytearray()
  700. write = f.extend
  701. def tobytes():
  702. return bytes(f)
  703. else:
  704. try:
  705. import cStringIO as sio
  706. except ImportError:
  707. import StringIO as sio
  708. f = sio.StringIO()
  709. write = f.write
  710. tobytes = f.getvalue
  711. if not endianness:
  712. endianness = sys.byteorder
  713. if endianness.upper()[0] in ('B>'):
  714. big_endian = True
  715. elif endianness.upper()[0] in ('L<'):
  716. big_endian = False
  717. else:
  718. raise ValueError("Invalid endianness %r: expected 'big', 'little', or None" % endianness)
  719. pack = struct.pack
  720. packspec = '>L' if big_endian else '<L'
  721. num_chars = 0
  722. if include_bom:
  723. if big_endian:
  724. write( utf32.BOM_UTF32_BE )
  725. else:
  726. write( utf32.BOM_UTF32_LE )
  727. num_chars += 1
  728. for pos, c in enumerate(obj):
  729. n = ord(c)
  730. if 0xD800 <= n <= 0xDFFF: # surrogate codepoints are prohibited by UTF-32
  731. if errors == 'ignore':
  732. pass
  733. elif errors == 'replace':
  734. n = 0xFFFD
  735. else:
  736. raise UnicodeEncodeError('utf32',obj,pos,pos+1,"surrogate code points from U+D800 to U+DFFF are not allowed")
  737. write( pack( packspec, n) )
  738. num_chars += 1
  739. return (tobytes(), num_chars)
  740. @staticmethod
  741. def utf32le_encode( obj, errors='strict', include_bom=False ):
  742. """Encodes a Unicode string into a UTF-32LE (little endian) encoded byte string."""
  743. return utf32.encode( obj, errors=errors, endianness='L', include_bom=include_bom )
  744. @staticmethod
  745. def utf32be_encode( obj, errors='strict', include_bom=False ):
  746. """Encodes a Unicode string into a UTF-32BE (big endian) encoded byte string."""
  747. return utf32.encode( obj, errors=errors, endianness='B', include_bom=include_bom )
  748. @staticmethod
  749. def decode( obj, errors='strict', endianness=None ):
  750. """Decodes a UTF-32 byte string into a Unicode string.
  751. Returns tuple (bytearray, num_bytes)
  752. The errors argument shold be one of 'strict', 'ignore',
  753. 'replace', 'backslashreplace', or 'xmlcharrefreplace'.
  754. The endianness should either be None (for auto-guessing), or a
  755. word that starts with 'B' (big) or 'L' (little).
  756. Will detect a Byte-Order Mark. If a BOM is found and endianness
  757. is also set, then the two must match.
  758. If neither a BOM is found nor endianness is set, then big
  759. endian order is assumed.
  760. """
  761. import struct, sys
  762. maxunicode = sys.maxunicode
  763. unpack = struct.unpack
  764. # Detect BOM
  765. if obj.startswith( utf32.BOM_UTF32_BE ):
  766. bom_endianness = 'B'
  767. start = len(utf32.BOM_UTF32_BE)
  768. elif obj.startswith( utf32.BOM_UTF32_LE ):
  769. bom_endianness = 'L'
  770. start = len(utf32.BOM_UTF32_LE)
  771. else:
  772. bom_endianness = None
  773. start = 0
  774. num_bytes = start
  775. if endianness == None:
  776. if bom_endianness == None:
  777. endianness = sys.byteorder.upper()[0] # Assume platform default
  778. else:
  779. endianness = bom_endianness
  780. else:
  781. endianness = endianness[0].upper()
  782. if bom_endianness and endianness != bom_endianness:
  783. raise UnicodeDecodeError('utf32',obj,0,start,'BOM does not match expected byte order')
  784. # Check for truncated last character
  785. if ((len(obj)-start) % 4) != 0:
  786. raise UnicodeDecodeError('utf32',obj,start,len(obj),
  787. 'Data length not a multiple of 4 bytes')
  788. # Start decoding characters
  789. chars = []
  790. packspec = '>L' if endianness=='B' else '<L'
  791. i = 0
  792. for i in range(start, len(obj), 4):
  793. seq = obj[i:i+4]
  794. n = unpack( packspec, seq )[0]
  795. num_bytes += 4
  796. if n > maxunicode or (0xD800 <= n <= 0xDFFF):
  797. if errors == 'strict':
  798. raise UnicodeDecodeError('utf32',obj,i,i+4,'Invalid code point U+%04X' % n)
  799. elif errors == 'replace':
  800. chars.append( unichr(0xFFFD) )
  801. elif errors == 'backslashreplace':
  802. if n > 0xffff:
  803. esc = "\\u%04x" % (n,)
  804. else:
  805. esc = "\\U%08x" % (n,)
  806. for esc_c in esc:
  807. chars.append( esc_c )
  808. elif errors == 'xmlcharrefreplace':
  809. esc = "&#%d;" % (n,)
  810. for esc_c in esc:
  811. chars.append( esc_c )
  812. else: # ignore
  813. pass
  814. else:
  815. chars.append( helpers.safe_unichr(n) )
  816. return (u''.join( chars ), num_bytes)
  817. @staticmethod
  818. def utf32le_decode( obj, errors='strict' ):
  819. """Decodes a UTF-32LE (little endian) byte string into a Unicode string."""
  820. return utf32.decode( obj, errors=errors, endianness='L' )
  821. @staticmethod
  822. def utf32be_decode( obj, errors='strict' ):
  823. """Decodes a UTF-32BE (big endian) byte string into a Unicode string."""
  824. return utf32.decode( obj, errors=errors, endianness='B' )
  825. # ----------------------------------------------------------------------
  826. # Helper functions
  827. # ----------------------------------------------------------------------
  828. def _make_unsafe_string_chars():
  829. import unicodedata
  830. unsafe = []
  831. for c in [unichr(i) for i in range(0x100)]:
  832. if c == u'"' or c == u'\\' \
  833. or unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']:
  834. unsafe.append( c )
  835. return u''.join( unsafe )
  836. class helpers(object):
  837. """A set of utility functions."""
  838. hexdigits = '0123456789ABCDEFabcdef'
  839. octaldigits = '01234567'
  840. unsafe_string_chars = _make_unsafe_string_chars()
  841. import sys
  842. maxunicode = sys.maxunicode
  843. always_use_custom_codecs = False # If True use demjson's codecs
  844. # before system codecs. This
  845. # is mainly here for testing.
  846. javascript_reserved_words = frozenset([
  847. # Keywords (plus "let") (ECMAScript 6 section 11.6.2.1)
  848. 'break','case','catch','class','const','continue',
  849. 'debugger','default','delete','do','else','export',
  850. 'extends','finally','for','function','if','import',
  851. 'in','instanceof','let','new','return','super',
  852. 'switch','this','throw','try','typeof','var','void',
  853. 'while','with','yield',
  854. # Future reserved words (ECMAScript 6 section 11.6.2.2)
  855. 'enum','implements','interface','package',
  856. 'private','protected','public','static',
  857. # null/boolean literals
  858. 'null','true','false'
  859. ])
  860. @staticmethod
  861. def make_raw_bytes( byte_list ):
  862. """Constructs a byte array (bytes in Python 3, str in Python 2) from a list of byte values (0-255).
  863. """
  864. return _make_raw_bytes( byte_list )
  865. @staticmethod
  866. def is_hex_digit( c ):
  867. """Determines if the given character is a valid hexadecimal digit (0-9, a-f, A-F)."""
  868. return (c in helpers.hexdigits)
  869. @staticmethod
  870. def is_octal_digit( c ):
  871. """Determines if the given character is a valid octal digit (0-7)."""
  872. return (c in helpers.octaldigits)
  873. @staticmethod
  874. def is_binary_digit( c ):
  875. """Determines if the given character is a valid binary digit (0 or 1)."""
  876. return (c == '0' or c == '1')
  877. @staticmethod
  878. def char_is_json_ws( c ):
  879. """Determines if the given character is a JSON white-space character"""
  880. return c in ' \t\n\r'
  881. @staticmethod
  882. def safe_unichr( codepoint ):
  883. """Just like Python's unichr() but works in narrow-Unicode Pythons."""
  884. if codepoint >= 0x10000 and codepoint > helpers.maxunicode:
  885. # Narrow-Unicode python, construct a UTF-16 surrogate pair.
  886. w1, w2 = helpers.make_surrogate_pair( codepoint )
  887. if w2 is None:
  888. c = unichr(w1)
  889. else:
  890. c = unichr(w1) + unichr(w2)
  891. else:
  892. c = unichr(codepoint)
  893. return c
  894. @staticmethod
  895. def char_is_unicode_ws( c ):
  896. """Determines if the given character is a Unicode space character"""
  897. if not isinstance(c,unicode):
  898. c = unicode(c)
  899. if c in u' \t\n\r\f\v':
  900. return True
  901. import unicodedata
  902. return unicodedata.category(c) == 'Zs'
  903. @staticmethod
  904. def char_is_json_eol( c ):
  905. """Determines if the given character is a JSON line separator"""
  906. return c in '\n\r'
  907. @staticmethod
  908. def char_is_unicode_eol( c ):
  909. """Determines if the given character is a Unicode line or
  910. paragraph separator. These correspond to CR and LF as well as
  911. Unicode characters in the Zl or Zp categories.
  912. """
  913. return c in u'\r\n\u2028\u2029'
  914. @staticmethod
  915. def char_is_identifier_leader( c ):
  916. """Determines if the character may be the first character of a
  917. JavaScript identifier.
  918. """
  919. return c.isalpha() or c in '_$'
  920. @staticmethod
  921. def char_is_identifier_tail( c ):
  922. """Determines if the character may be part of a JavaScript
  923. identifier.
  924. """
  925. return c.isalnum() or c in u'_$\u200c\u200d'
  926. @staticmethod
  927. def extend_and_flatten_list_with_sep( orig_seq, extension_seq, separator='' ):
  928. for i, part in enumerate(extension_seq):
  929. if i > 0 and separator:
  930. orig_seq.append( separator )
  931. orig_seq.extend( part )
  932. @staticmethod
  933. def strip_format_control_chars( txt ):
  934. """Filters out all Unicode format control characters from the string.
  935. ECMAScript permits any Unicode "format control characters" to
  936. appear at any place in the source code. They are to be
  937. ignored as if they are not there before any other lexical
  938. tokenization occurs. Note that JSON does not allow them,
  939. except within string literals.
  940. * Ref. ECMAScript section 7.1.
  941. * http://en.wikipedia.org/wiki/Unicode_control_characters
  942. There are dozens of Format Control Characters, for example:
  943. U+00AD SOFT HYPHEN
  944. U+200B ZERO WIDTH SPACE
  945. U+2060 WORD JOINER
  946. """
  947. import unicodedata
  948. txt2 = filter( lambda c: unicodedata.category(unicode(c)) != 'Cf', txt )
  949. # 2to3 NOTE: The following is needed to work around a broken
  950. # Python3 conversion in which filter() will be transformed
  951. # into a list rather than a string.
  952. if not isinstance(txt2,basestring):
  953. txt2 = u''.join(txt2)
  954. return txt2
  955. @staticmethod
  956. def lookup_codec( encoding ):
  957. """Wrapper around codecs.lookup().
  958. Returns None if codec not found, rather than raising a LookupError.
  959. """
  960. import codecs
  961. if isinstance( encoding, codecs.CodecInfo ):
  962. return encoding
  963. encoding = encoding.lower()
  964. import codecs
  965. if helpers.always_use_custom_codecs:
  966. # Try custom utf32 first, then standard python codecs
  967. cdk = utf32.lookup(encoding)
  968. if not cdk:
  969. try:
  970. cdk = codecs.lookup( encoding )
  971. except LookupError:
  972. cdk = None
  973. else:
  974. # Try standard python codecs first, then custom utf32
  975. try:
  976. cdk = codecs.lookup( encoding )
  977. except LookupError:
  978. cdk = utf32.lookup( encoding )
  979. return cdk
  980. @staticmethod
  981. def auto_detect_encoding( s ):
  982. """Takes a string (or byte array) and tries to determine the Unicode encoding it is in.
  983. Returns the encoding name, as a string.
  984. """
  985. if not s or len(s)==0:
  986. return "utf-8"
  987. # Get the byte values of up to the first 4 bytes
  988. ords = []
  989. for i in range(0, min(len(s),4)):
  990. x = s[i]
  991. if isinstance(x, basestring):
  992. x = ord(x)
  993. ords.append( x )
  994. # Look for BOM marker
  995. import sys, codecs
  996. bom2, bom3, bom4 = None, None, None
  997. if len(s) >= 2:
  998. bom2 = s[:2]
  999. if len(s) >= 3:
  1000. bom3 = s[:3]
  1001. if len(s) >= 4:
  1002. bom4 = s[:4]
  1003. # Assign values of first four bytes to: a, b, c, d; and last byte to: z
  1004. a, b, c, d, z = None, None, None, None, None
  1005. if len(s) >= 1:
  1006. a = ords[0]
  1007. if len(s) >= 2:
  1008. b = ords[1]
  1009. if len(s) >= 3:
  1010. c = ords[2]
  1011. if len(s) >= 4:
  1012. d = ords[3]
  1013. z = s[-1]
  1014. if isinstance(z, basestring):
  1015. z = ord(z)
  1016. if bom4 and ( (hasattr(codecs,'BOM_UTF32_LE') and bom4 == codecs.BOM_UTF32_LE) or
  1017. (bom4 == utf32.BOM_UTF32_LE) ):
  1018. encoding = 'utf-32le'
  1019. s = s[4:]
  1020. elif bom4 and ( (hasattr(codecs,'BOM_UTF32_BE') and bom4 == codecs.BOM_UTF32_BE) or
  1021. (bom4 == utf32.BOM_UTF32_BE) ):
  1022. encoding = 'utf-32be'
  1023. s = s[4:]
  1024. elif bom2 and bom2 == codecs.BOM_UTF16_LE:
  1025. encoding = 'utf-16le'
  1026. s = s[2:]
  1027. elif bom2 and bom2 == codecs.BOM_UTF16_BE:
  1028. encoding = 'utf-16be'
  1029. s = s[2:]
  1030. elif bom3 and bom3 == codecs.BOM_UTF8:
  1031. encoding = 'utf-8'
  1032. s = s[3:]
  1033. # No BOM, so autodetect encoding used by looking at first four
  1034. # bytes according to RFC 4627 section 3. The first and last bytes
  1035. # in a JSON document will be ASCII. The second byte will be ASCII
  1036. # unless the first byte was a quotation mark.
  1037. elif len(s)>=4 and a==0 and b==0 and c==0 and d!=0: # UTF-32BE (0 0 0 x)
  1038. encoding = 'utf-32be'
  1039. elif len(s)>=4 and a!=0 and b==0 and c==0 and d==0 and z==0: # UTF-32LE (x 0 0 0 [... 0])
  1040. encoding = 'utf-32le'
  1041. elif len(s)>=2 and a==0 and b!=0: # UTF-16BE (0 x)
  1042. encoding = 'utf-16be'
  1043. elif len(s)>=2 and a!=0 and b==0 and z==0: # UTF-16LE (x 0 [... 0])
  1044. encoding = 'utf-16le'
  1045. elif ord('\t') <= a <= 127:
  1046. # First byte appears to be ASCII, so guess UTF-8.
  1047. encoding = 'utf8'
  1048. else:
  1049. raise ValueError("Can not determine the Unicode encoding for byte stream")
  1050. return encoding
  1051. @staticmethod
  1052. def unicode_decode( txt, encoding=None ):
  1053. """Takes a string (or byte array) and tries to convert it to a Unicode string.
  1054. Returns a named tuple: (string, codec, bom)
  1055. The 'encoding' argument, if supplied, should either the name of
  1056. a character encoding, or an instance of codecs.CodecInfo. If
  1057. the encoding argument is None or "auto" then the encoding is
  1058. automatically determined, if possible.
  1059. Any BOM (Byte Order Mark) that is found at the beginning of the
  1060. input will be stripped off and placed in the 'bom' portion of
  1061. the returned value.
  1062. """
  1063. if isinstance(txt, unicode):
  1064. res = _namedtuple('DecodedString',['string','codec','bom'])( txt, None, None )
  1065. else:
  1066. if encoding is None or encoding == 'auto':
  1067. encoding = helpers.auto_detect_encoding( txt )
  1068. cdk = helpers.lookup_codec( encoding )
  1069. if not cdk:
  1070. raise LookupError("Can not find codec for encoding %r" % encoding)
  1071. try:
  1072. # Determine if codec takes arguments; try a decode of nothing
  1073. cdk.decode( helpers.make_raw_bytes([]), errors='strict' )
  1074. except TypeError:
  1075. cdk_kw = {} # This coded doesn't like the errors argument
  1076. else:
  1077. cdk_kw = {'errors': 'strict'}
  1078. unitxt, numbytes = cdk.decode( txt, **cdk_kw ) # DO THE DECODE HERE!
  1079. # Remove BOM if present
  1080. if len(unitxt) > 0 and unitxt[0] == u'\uFEFF':
  1081. bom = cdk.encode(unitxt[0])[0]
  1082. unitxt = unitxt[1:]
  1083. elif len(unitxt) > 0 and unitxt[0] == u'\uFFFE': # Reversed BOM
  1084. raise UnicodeDecodeError(cdk.name,txt,0,0,"Wrong byte order, found reversed BOM U+FFFE")
  1085. else:
  1086. bom = None
  1087. res = _namedtuple('DecodedString',['string','codec','bom'])( unitxt, cdk, bom )
  1088. return res
  1089. @staticmethod
  1090. def surrogate_pair_as_unicode( c1, c2 ):
  1091. """Takes a pair of unicode surrogates and returns the equivalent unicode character.
  1092. The input pair must be a surrogate pair, with c1 in the range
  1093. U+D800 to U+DBFF and c2 in the range U+DC00 to U+DFFF.
  1094. """
  1095. n1, n2 = ord(c1), ord(c2)
  1096. if n1 < 0xD800 or n1 > 0xDBFF or n2 < 0xDC00 or n2 > 0xDFFF:
  1097. raise JSONDecodeError('illegal Unicode surrogate pair',(c1,c2))
  1098. a = n1 - 0xD800
  1099. b = n2 - 0xDC00
  1100. v = (a << 10) | b
  1101. v += 0x10000
  1102. return helpers.safe_unichr(v)
  1103. @staticmethod
  1104. def unicode_as_surrogate_pair( c ):
  1105. """Takes a single unicode character and returns a sequence of surrogate pairs.
  1106. The output of this function is a tuple consisting of one or two unicode
  1107. characters, such that if the input character is outside the BMP range
  1108. then the output is a two-character surrogate pair representing that character.
  1109. If the input character is inside the BMP then the output tuple will have
  1110. just a single character...the same one.
  1111. """
  1112. n = ord(c)
  1113. w1, w2 = helpers.make_surrogate_pair(n)
  1114. if w2 is None:
  1115. return (unichr(w1),)
  1116. else:
  1117. return (unichr(w1), unichr(w2))
  1118. @staticmethod
  1119. def make_surrogate_pair( codepoint ):
  1120. """Given a Unicode codepoint (int) returns a 2-tuple of surrogate codepoints."""
  1121. if codepoint < 0x10000:
  1122. return (codepoint,None) # in BMP, surrogate pair not required
  1123. v = codepoint - 0x10000
  1124. vh = (v >> 10) & 0x3ff # highest 10 bits
  1125. vl = v & 0x3ff # lowest 10 bits
  1126. w1 = 0xD800 | vh
  1127. w2 = 0xDC00 | vl
  1128. return (w1, w2)
  1129. @staticmethod
  1130. def isnumbertype( obj ):
  1131. """Is the object of a Python number type (excluding complex)?"""
  1132. return isinstance(obj, (int,long,float)) \
  1133. and not isinstance(obj, bool) \
  1134. or obj is nan or obj is inf or obj is neginf \
  1135. or (decimal and isinstance(obj, decimal.Decimal))
  1136. @staticmethod
  1137. def is_negzero( n ):
  1138. """Is the number value a negative zero?"""
  1139. if isinstance( n, float ):
  1140. return n == 0.0 and repr(n).startswith('-')
  1141. elif decimal and isinstance( n, decimal.Decimal ):
  1142. return n.is_zero() and n.is_signed()
  1143. else:
  1144. return False
  1145. @staticmethod
  1146. def is_nan( n ):
  1147. """Is the number a NaN (not-a-number)?"""
  1148. if isinstance( n, float ):
  1149. return n is nan or n.hex() == 'nan' or n != n
  1150. elif decimal and isinstance( n, decimal.Decimal ):
  1151. return n.is_nan()
  1152. else:
  1153. return False
  1154. @staticmethod
  1155. def is_infinite( n ):
  1156. """Is the number infinite?"""
  1157. if isinstance( n, float ):
  1158. return n is inf or n is neginf or n.hex() in ('inf','-inf')
  1159. elif decimal and isinstance( n, decimal.Decimal ):
  1160. return n.is_infinite()
  1161. else:
  1162. return False
  1163. @staticmethod
  1164. def isstringtype( obj ):
  1165. """Is the object of a Python string type?"""
  1166. if isinstance(obj, basestring):
  1167. return True
  1168. # Must also check for some other pseudo-string types
  1169. import types, UserString
  1170. return isinstance(obj, types.StringTypes) \
  1171. or isinstance(obj, UserString.UserString)
  1172. ## or isinstance(obj, UserString.MutableString)
  1173. @staticmethod
  1174. def decode_hex( hexstring ):
  1175. """Decodes a hexadecimal string into it's integer value."""
  1176. # We don't use the builtin 'hex' codec in python since it can
  1177. # not handle odd numbers of digits, nor raise the same type
  1178. # of exceptions we want to.
  1179. n = 0
  1180. for c in hexstring:
  1181. if '0' <= c <= '9':
  1182. d = ord(c) - ord('0')
  1183. elif 'a' <= c <= 'f':
  1184. d = ord(c) - ord('a') + 10
  1185. elif 'A' <= c <= 'F':
  1186. d = ord(c) - ord('A') + 10
  1187. else:
  1188. raise ValueError('Not a hexadecimal number', hexstring)
  1189. # Could use ((n << 4 ) | d), but python 2.3 issues a FutureWarning.
  1190. n = (n * 16) + d
  1191. return n
  1192. @staticmethod
  1193. def decode_octal( octalstring ):
  1194. """Decodes an octal string into it's integer value."""
  1195. n = 0
  1196. for c in octalstring:
  1197. if '0' <= c <= '7':
  1198. d = ord(c) - ord('0')
  1199. else:
  1200. raise ValueError('Not an octal number', octalstring)
  1201. # Could use ((n << 3 ) | d), but python 2.3 issues a FutureWarning.
  1202. n = (n * 8) + d
  1203. return n
  1204. @staticmethod
  1205. def decode_binary( binarystring ):
  1206. """Decodes a binary string into it's integer value."""
  1207. n = 0
  1208. for c in binarystring:
  1209. if c == '0':
  1210. d = 0
  1211. elif c == '1':
  1212. d = 1
  1213. else:
  1214. raise ValueError('Not an binary number', binarystring)
  1215. # Could use ((n << 3 ) | d), but python 2.3 issues a FutureWarning.
  1216. n = (n * 2) + d
  1217. return n
  1218. @staticmethod
  1219. def format_timedelta_iso( td ):
  1220. """Encodes a datetime.timedelta into ISO-8601 Time Period format.
  1221. """
  1222. d = td.days
  1223. s = td.seconds
  1224. ms = td.microseconds
  1225. m, s = divmod(s,60)
  1226. h, m = divmod(m,60)
  1227. a = ['P']
  1228. if d:
  1229. a.append( '%dD' % d )
  1230. if h or m or s or ms:
  1231. a.append( 'T' )
  1232. if h:
  1233. a.append( '%dH' % h )
  1234. if m:
  1235. a.append( '%dM' % m )
  1236. if s or ms:
  1237. if ms:
  1238. a.append( '%d.%06d' % (s,ms) )
  1239. else:
  1240. a.append( '%d' % s )
  1241. if len(a)==1:
  1242. a.append('T0S')
  1243. return ''.join(a)
  1244. # ----------------------------------------------------------------------
  1245. # File position indicator
  1246. # ----------------------------------------------------------------------
  1247. class position_marker(object):
  1248. """A position marks a specific place in a text document.
  1249. It consists of the following attributes:
  1250. * line - The line number, starting at 1
  1251. * column - The column on the line, starting at 0
  1252. * char_position - The number of characters from the start of
  1253. the document, starting at 0
  1254. * text_after - (optional) a short excerpt of the text of
  1255. document starting at the current position
  1256. Lines are separated by any Unicode line separator character. As an
  1257. exception a CR+LF character pair is treated as being a single line
  1258. separator demarcation.
  1259. Columns are simply a measure of the number of characters after the
  1260. start of a new line, starting at 0. Visual effects caused by
  1261. Unicode characters such as combining characters, bidirectional
  1262. text, zero-width characters and so on do not affect the
  1263. computation of the column regardless of visual appearance.
  1264. The char_position is a count of the number of characters since the
  1265. beginning of the document, starting at 0. As used within the
  1266. buffered_stream class, if the document starts with a Unicode Byte
  1267. Order Mark (BOM), the BOM prefix is NOT INCLUDED in the count.
  1268. """
  1269. def __init__(self, offset=0, line=1, column=0, text_after=None):
  1270. self.__char_position = offset
  1271. self.__line = line
  1272. self.__column = column
  1273. self.__text_after = text_after
  1274. self.__at_end = False
  1275. self.__last_was_cr = False
  1276. @property
  1277. def line(self):
  1278. """The current line within the document, starts at 1."""
  1279. return self.__line
  1280. @property
  1281. def column(self):
  1282. """The current character column from the beginning of the
  1283. document, starts at 0.
  1284. """
  1285. return self.__column
  1286. @property
  1287. def char_position(self):
  1288. """The current character offset from the beginning of the
  1289. document, starts at 0.
  1290. """
  1291. return self.__char_position
  1292. @property
  1293. def at_start(self):
  1294. """Returns True if the position is at the start of the document."""
  1295. return (self.char_position == 0)
  1296. @property
  1297. def at_end(self):
  1298. """Returns True if the position is at the end of the document.
  1299. This property must be set by the user.
  1300. """
  1301. return self.__at_end
  1302. @at_end.setter
  1303. def at_end(self, b):
  1304. """Sets the at_end property to True or False.
  1305. """
  1306. self.__at_end = bool(b)
  1307. @property
  1308. def text_after(self):
  1309. """Returns a textual excerpt starting at the current position.
  1310. This property must be set by the user.
  1311. """
  1312. return self.__at_end
  1313. @text_after.setter
  1314. def text_after(self, value):
  1315. """Sets the text_after property to a given string.
  1316. """
  1317. self.__text_after = value
  1318. def __repr__(self):
  1319. s = "%s(offset=%r,line=%r,column=%r" \
  1320. % (self.__class__.__name__,
  1321. self.__char_position,
  1322. self.__line,
  1323. self.__column)
  1324. if self.text_after:
  1325. s += ",text_after=%r" % (self.text_after,)
  1326. s += ")"
  1327. return s
  1328. def describe(self, show_text=True):
  1329. """Returns a human-readable description of the position, in English."""
  1330. s = "line %d, column %d, offset %d" % (self.__line,
  1331. self.__column,
  1332. self.__char_position)
  1333. if self.at_start:
  1334. s += " (AT-START)"
  1335. elif self.at_end:
  1336. s += " (AT-END)"
  1337. if show_text and self.text_after:
  1338. s += ", text %r" % (self.text_after)
  1339. return s
  1340. def __str__(self):
  1341. """Same as the describe() function."""
  1342. return self.describe( show_text=True )
  1343. def copy( self ):
  1344. """Create a copy of the position object."""
  1345. p = self.__class__()
  1346. p.__char_position = self.__char_position
  1347. p.__line = self.__line
  1348. p.__column = self.__column
  1349. p.text_after = self.__text_after
  1350. p.at_end = self.at_end
  1351. p.__last_was_cr = self.__last_was_cr
  1352. return p
  1353. def rewind( self ):
  1354. """Set the position to the start of the document."""
  1355. if not self.at_start:
  1356. self.text_after = None
  1357. self.at_end = False
  1358. self.__char_position = 0
  1359. self.__line = 1
  1360. self.__column = 0
  1361. self.__last_was_cr = False
  1362. def advance( self, s ):
  1363. """Advance the position from its current place according to
  1364. the given string of characters.
  1365. """
  1366. if s:
  1367. self.text_after = None
  1368. for c in s:
  1369. self.__char_position += 1
  1370. if c == '\n' and self.__last_was_cr:
  1371. self.__last_was_cr = False
  1372. elif helpers.char_is_unicode_eol(c):
  1373. self.__line += 1
  1374. self.__column = 0
  1375. self.__last_was_cr = (c == '\r')
  1376. else:
  1377. self.__column += 1
  1378. self.__last_was_cr = False
  1379. # ----------------------------------------------------------------------
  1380. # Buffered Stream Reader
  1381. # ----------------------------------------------------------------------
  1382. class buffered_stream(object):
  1383. """A helper class for the JSON parser.
  1384. It allows for reading an input document, while handling some
  1385. low-level Unicode issues as well as tracking the current position
  1386. in terms of line and column position.
  1387. """
  1388. def __init__(self, txt='', encoding=None):
  1389. self.reset()
  1390. self.set_text( txt, encoding )
  1391. def reset(self):
  1392. """Clears the state to nothing."""
  1393. self.__pos = position_marker()
  1394. self.__saved_pos = [] # Stack of saved positions
  1395. self.__bom = helpers.make_raw_bytes([]) # contains copy of byte-order mark, if any
  1396. self.__codec = None # The CodecInfo
  1397. self.__encoding = None # The name of the codec's encoding
  1398. self.__input_is_bytes = False
  1399. self.__rawbuf = None
  1400. self.__raw_bytes = None
  1401. self.__cmax = 0
  1402. self.num_ws_skipped = 0
  1403. def save_position(self):
  1404. self.__saved_pos.append( self.__pos.copy() )
  1405. return True
  1406. def clear_saved_position(self):
  1407. if self.__saved_pos:
  1408. self.__saved_pos.pop()
  1409. return True
  1410. else:
  1411. return False
  1412. def restore_position(self):
  1413. try:
  1414. old_pos = self.__saved_pos.pop() # Can raise IndexError
  1415. except IndexError, err:
  1416. raise IndexError("Attempt to restore buffer position that was never saved")
  1417. else:
  1418. self.__pos = old_pos
  1419. return True
  1420. def _find_codec(self, encoding):
  1421. if encoding is None:
  1422. self.__codec = None
  1423. self.__encoding = None
  1424. elif isinstance(encoding, codecs.CodecInfo):
  1425. self.__codec = encoding
  1426. self.__encoding = self.__codec.name
  1427. else:
  1428. self.__encoding = encoding
  1429. self.__codec = helpers.lookup_codec( encoding )
  1430. if not self.__codec:
  1431. raise JSONDecodeError('no codec available for character encoding',encoding)
  1432. return self.__codec
  1433. def set_text( self, txt, encoding=None ):
  1434. """Changes the input text document and rewinds the position to
  1435. the start of the new document.
  1436. """
  1437. import sys
  1438. self.rewind()
  1439. self.__codec = None
  1440. self.__bom = None
  1441. self.__rawbuf = u''
  1442. self.__cmax = 0 # max number of chars in input
  1443. try:
  1444. decoded = helpers.unicode_decode( txt, encoding )
  1445. except JSONError:
  1446. raise
  1447. except Exception, err:
  1448. # Re-raise as a JSONDecodeError
  1449. e2 = sys.exc_info()
  1450. newerr = JSONDecodeError("a Unicode decoding error occurred")
  1451. # Simulate Python 3's: "raise X from Y" exception chaining
  1452. newerr.__cause__ = err
  1453. newerr.__traceback__ = e2[2]
  1454. raise newerr
  1455. else:
  1456. self.__codec = decoded.codec
  1457. self.__bom = decoded.bom
  1458. self.__rawbuf = decoded.string
  1459. self.__cmax = len(self.__rawbuf)
  1460. def __repr__(self):
  1461. return '<%s at %r text %r>' % (self.__class__.__name__, self.__pos, self.text_context)
  1462. def rewind(self):
  1463. """Resets the position back to the start of the input text."""
  1464. self.__pos.rewind()
  1465. @property
  1466. def codec(self):
  1467. """The codec object used to perform Unicode decoding, or None."""
  1468. return self.__codec
  1469. @property
  1470. def bom(self):
  1471. """The Unicode Byte-Order Mark (BOM), if any, that was present
  1472. at the start of the input text. The returned BOM is a string
  1473. of the raw bytes, and is not Unicode-decoded.
  1474. """
  1475. return self.__bom
  1476. @property
  1477. def cpos(self):
  1478. """The current character offset from the start of the document."""
  1479. return self.__pos.char_position
  1480. @property
  1481. def position(self):
  1482. """The current position (as a position_marker object).
  1483. Returns a copy.
  1484. """
  1485. p = self.__pos.copy()
  1486. p.text_after = self.text_context
  1487. p.at_end = self.at_end
  1488. return p
  1489. @property
  1490. def at_start(self):
  1491. """Returns True if the position is currently at the start of
  1492. the document, or False otherwise.
  1493. """
  1494. return self.__pos.at_start
  1495. @property
  1496. def at_end(self):
  1497. """Returns True if the position is currently at the end of the
  1498. document, of False otherwise.
  1499. """
  1500. c = self.peek()
  1501. return (not c)
  1502. def at_ws(self, allow_unicode_whitespace=True):
  1503. """Returns True if the current position contains a white-space
  1504. character.
  1505. """
  1506. c = self.peek()
  1507. if not c:
  1508. return False
  1509. elif allow_unicode_whitespace:
  1510. return helpers.char_is_unicode_ws(c)
  1511. else:
  1512. return helpers.char_is_json_ws(c)
  1513. def at_eol(self, allow_unicode_eol=True):
  1514. """Returns True if the current position contains an
  1515. end-of-line control character.
  1516. """
  1517. c = self.peek()
  1518. if not c:
  1519. return True # End of file is treated as end of line
  1520. elif allow_unicode_eol:
  1521. return helpers.char_is_unicode_eol(c)
  1522. else:
  1523. return helpers.char_is_json_eol(c)
  1524. def peek( self, offset=0 ):
  1525. """Returns the character at the current position, or at a
  1526. given offset away from the current position. If the position
  1527. is beyond the limits of the document size, then an empty
  1528. string '' is returned.
  1529. """
  1530. i = self.cpos + offset
  1531. if i < 0 or i >= self.__cmax:
  1532. return ''
  1533. return self.__rawbuf[i]
  1534. def peekstr( self, span=1, offset=0 ):
  1535. """Returns one or more characters starting at the current
  1536. position, or at a given offset away from the current position,
  1537. and continuing for the given span length. If the offset and
  1538. span go outside the limit of the current document size, then
  1539. the returned string may be shorter than the requested span
  1540. length.
  1541. """
  1542. i = self.cpos + offset
  1543. j = i + span
  1544. if i < 0 or i >= self.__cmax:
  1545. return ''
  1546. return self.__rawbuf[i : j]
  1547. @property
  1548. def text_context( self, context_size = 20 ):
  1549. """A short human-readable textual excerpt of the document at
  1550. the current position, in English.
  1551. """
  1552. context_size = max( context_size, 4 )
  1553. s = self.peekstr(context_size + 1)
  1554. if not s:
  1555. return ''
  1556. if len(s) > context_size:
  1557. s = s[:context_size - 3] + "..."
  1558. return s
  1559. def startswith( self, s ):
  1560. """Determines if the text at the current position starts with
  1561. the given string.
  1562. See also method: pop_if_startswith()
  1563. """
  1564. s2 = self.peekstr( len(s) )
  1565. return s == s2
  1566. def skip( self, span=1 ):
  1567. """Advances the current position by one (or the given number)
  1568. of characters. Will not advance beyond the end of the
  1569. document. Returns the number of characters skipped.
  1570. """
  1571. i = self.cpos
  1572. self.__pos.advance( self.peekstr(span) )
  1573. return self.cpos - i
  1574. def skipuntil( self, testfn ):
  1575. """Advances the current position until a given predicate test
  1576. function succeeds, or the end of the document is reached.
  1577. Returns the actual number of characters skipped.
  1578. The provided test function should take a single unicode
  1579. character and return a boolean value, such as:
  1580. lambda c : c == '.' # Skip to next period
  1581. See also methods: skipwhile() and popuntil()
  1582. """
  1583. i = self.cpos
  1584. while True:
  1585. c = self.peek()
  1586. if not c or testfn(c):
  1587. break
  1588. else:
  1589. self.__pos.advance(c)
  1590. return self.cpos - i
  1591. def skipwhile( self, testfn ):
  1592. """Advances the current position until a given predicate test
  1593. function fails, or the end of the document is reached.
  1594. Returns the actual number of characters skipped.
  1595. The provided test function should take a single unicode
  1596. character and return a boolean value, such as:
  1597. lambda c : c.isdigit() # Skip all digits
  1598. See also methods: skipuntil() and popwhile()
  1599. """
  1600. return self.skipuntil( lambda c: not testfn(c) )
  1601. def skip_to_next_line( self, allow_unicode_eol=True ):
  1602. """Advances the current position to the start of the next
  1603. line. Will not advance beyond the end of the file. Note that
  1604. the two-character sequence CR+LF is recognized as being just a
  1605. single end-of-line marker.
  1606. """
  1607. ln = self.__pos.line
  1608. while True:
  1609. c = self.pop()
  1610. if not c or self.__pos.line > ln:
  1611. if c == '\r' and self.peek() == '\n':
  1612. self.skip()
  1613. break
  1614. def skipws( self, allow_unicode_whitespace=True ):
  1615. """Advances the current position past all whitespace, or until
  1616. the end of the document is reached.
  1617. """
  1618. if allow_unicode_whitespace:
  1619. n = self.skipwhile( helpers.char_is_unicode_ws )
  1620. else:
  1621. n = self.skipwhile( helpers.char_is_json_ws )
  1622. self.num_ws_skipped += n
  1623. return n
  1624. def pop( self ):
  1625. """Returns the character at the current position and advances
  1626. the position to the next character. At the end of the
  1627. document this function returns an empty string.
  1628. """
  1629. c = self.peek()
  1630. if c:
  1631. self.__pos.advance( c )
  1632. return c
  1633. def popstr( self, span=1, offset=0 ):
  1634. """Returns a string of one or more characters starting at the
  1635. current position, and advances the position to the following
  1636. character after the span. Will not go beyond the end of the
  1637. document, so the returned string may be shorter than the
  1638. requested span.
  1639. """
  1640. s = self.peekstr(span)
  1641. if s:
  1642. self.__pos.advance( s )
  1643. return s
  1644. def popif( self, testfn ):
  1645. """Just like the pop() function, but only returns the
  1646. character if the given predicate test function succeeds.
  1647. """
  1648. c = self.peek()
  1649. if c and testfn(c):
  1650. self.__pos.advance( c )
  1651. return c
  1652. return ''
  1653. def pop_while_in( self, chars ):
  1654. """Pops a sequence of characters at the current position
  1655. as long as each of them is in the given set of characters.
  1656. """
  1657. if not isinstance( chars, (set,frozenset)):
  1658. cset = set( chars )
  1659. c = self.peek()
  1660. if c and c in cset:
  1661. s = self.popwhile( lambda c: c and c in cset )
  1662. return s
  1663. return None
  1664. def pop_identifier( self, match=None ):
  1665. """Pops the sequence of characters at the current position
  1666. that match the syntax for a JavaScript identifier.
  1667. """
  1668. c = self.peek()
  1669. if c and helpers.char_is_identifier_leader(c):
  1670. s = self.popwhile( helpers.char_is_identifier_tail )
  1671. return s
  1672. return None
  1673. def pop_if_startswith( self, s ):
  1674. """Pops the sequence of characters if they match the given string.
  1675. See also method: startswith()
  1676. """
  1677. s2 = self.peekstr( len(s) )
  1678. if s2 != s:
  1679. return NULL
  1680. self.__pos.advance( s2 )
  1681. return s2
  1682. def popwhile( self, testfn, maxchars=None ):
  1683. """Pops all the characters starting at the current position as
  1684. long as each character passes the given predicate function
  1685. test. If maxchars a numeric value instead of None then then
  1686. no more than that number of characters will be popped
  1687. regardless of the predicate test.
  1688. See also methods: skipwhile() and popuntil()
  1689. """
  1690. s = []
  1691. i = 0
  1692. while maxchars is None or i < maxchars:
  1693. c = self.popif( testfn )
  1694. if not c:
  1695. break
  1696. s.append( c )
  1697. i += 1
  1698. return ''.join(s)
  1699. def popuntil( self, testfn, maxchars=None ):
  1700. """Just like popwhile() method except the predicate function
  1701. should return True to stop the sequence rather than False.
  1702. See also methods: skipuntil() and popwhile()
  1703. """
  1704. return popwhile( lambda c: not testfn(c), maxchars=maxchars )
  1705. def __getitem__( self, index ):
  1706. """Returns the character at the given index relative to the current position.
  1707. If the index goes beyond the end of the input, or prior to the
  1708. start when negative, then '' is returned.
  1709. If the index provided is a slice object, then that range of
  1710. characters is returned as a string. Note that a stride value other
  1711. than 1 is not supported in the slice. To use a slice, do:
  1712. s = my_stream[ 1:4 ]
  1713. """
  1714. if isinstance( index, slice ):
  1715. return self.peekstr( index.stop - index.start, index.start )
  1716. else:
  1717. return self.peek( index )
  1718. # ----------------------------------------------------------------------
  1719. # Exception classes.
  1720. # ----------------------------------------------------------------------
  1721. class JSONException(Exception):
  1722. """Base class for all JSON-related exceptions.
  1723. """
  1724. pass
  1725. class JSONSkipHook(JSONException):
  1726. """An exception to be raised by user-defined code within hook
  1727. callbacks to indicate the callback does not want to handle the
  1728. situation.
  1729. """
  1730. pass
  1731. class JSONStopProcessing(JSONException):
  1732. """Can be raised by anyplace, including inside a hook function, to
  1733. cause the entire encode or decode process to immediately stop
  1734. with an error.
  1735. """
  1736. pass
  1737. class JSONAbort(JSONException):
  1738. pass
  1739. class JSONError(JSONException):
  1740. """Base class for all JSON-related errors.
  1741. In addition to standard Python exceptions, these exceptions may
  1742. also have additional properties:
  1743. * severity - One of: 'fatal', 'error', 'warning', 'info'
  1744. * position - An indication of the position in the input where the error occured.
  1745. * outer_position - A secondary position (optional) that gives
  1746. the location of the outer data item in which the error
  1747. occured, such as the beginning of a string or an array.
  1748. * context_description - A string that identifies the context
  1749. in which the error occured. Default is "Context".
  1750. """
  1751. severities = frozenset(['fatal','error','warning','info'])
  1752. def __init__(self, message, *args, **kwargs ):
  1753. self.severity = 'error'
  1754. self._position = None
  1755. self.outer_position = None
  1756. self.context_description = None
  1757. for kw,val in kwargs.items():
  1758. if kw == 'severity':
  1759. if val not in self.severities:
  1760. raise TypeError("%s given invalid severity %r" % (self.__class__.__name__, val))
  1761. self.severity = val
  1762. elif kw == 'position':
  1763. self.position = val
  1764. elif kw == 'outer_position':
  1765. self.outer_position = val
  1766. elif kw == 'context_description' or kw=='context':
  1767. self.context_description = val
  1768. else:
  1769. raise TypeError("%s does not accept %r keyword argument" % (self.__class__.__name__, kw))
  1770. super( JSONError, self ).__init__( message, *args )
  1771. self.message = message
  1772. @property
  1773. def position(self):
  1774. return self._position
  1775. @position.setter
  1776. def position(self, pos):
  1777. if pos == 0:
  1778. self._position = 0 #position_marker() # start of input
  1779. else:
  1780. self._position = pos
  1781. def __repr__(self):
  1782. s = "%s(%r" % (self.__class__.__name__, self.message)
  1783. for a in self.args[1:]:
  1784. s += ", %r" % (a,)
  1785. if self.position:
  1786. s += ", position=%r" % (self.position,)
  1787. if self.outer_position:
  1788. s += ", outer_position=%r" % (self.outer_position,)
  1789. s += ", severity=%r)" % (self.severity,)
  1790. return s
  1791. def pretty_description(self, show_positions=True, filename=None):
  1792. if filename:
  1793. pfx = filename.rstrip().rstrip(':') + ':'
  1794. else:
  1795. pfx = ''
  1796. # Print file position as numeric abbreviation
  1797. err = pfx
  1798. if self.position == 0:
  1799. err += '0:0:'
  1800. elif self.position:
  1801. err += '%d:%d:' % (self.position.line, self.position.column)
  1802. else:
  1803. err += ' '
  1804. # Print severity and main error message
  1805. err += " %s: %s" % (self.severity.capitalize(), self.message)
  1806. if len(self.args) > 1:
  1807. err += ': '
  1808. for anum, a in enumerate(self.args[1:]):
  1809. if anum > 1:
  1810. err += ', '
  1811. astr = repr(a)
  1812. if len(astr) > 30:
  1813. astr = astr[:30] + '...'
  1814. err += astr
  1815. # Print out exception chain
  1816. e2 = self
  1817. while e2:
  1818. if hasattr(e2,'__cause__') and isinstance(e2.__cause__,Exception):
  1819. e2 = e2.__cause__
  1820. e2desc = str(e2).strip()
  1821. if not e2desc:
  1822. e2desc = repr(e2).strip()
  1823. err += "\n | Cause: %s" % e2desc.strip().replace('\n','\n | ')
  1824. else:
  1825. e2 = None
  1826. # Show file position
  1827. if show_positions and self.position is not None:
  1828. if self.position == 0:
  1829. err += "\n | At start of input"
  1830. else:
  1831. err += "\n | At %s" % (self.position.describe(show_text=False),)
  1832. if self.position.text_after:
  1833. err += "\n | near text: %r" % (self.position.text_after,)
  1834. # Show context
  1835. if show_positions and self.outer_position:
  1836. if self.context_description:
  1837. cdesc = self.context_description.capitalize()
  1838. else:
  1839. cdesc = "Context"
  1840. err += "\n | %s started at %s" % (cdesc, self.outer_position.describe(show_text=False),)
  1841. if self.outer_position.text_after:
  1842. err += "\n | with text: %r" % (self.outer_position.text_after,)
  1843. return err
  1844. class JSONDecodeError(JSONError):
  1845. """An exception class raised when a JSON decoding error (syntax error) occurs."""
  1846. pass
  1847. class JSONDecodeHookError(JSONDecodeError):
  1848. """An exception that occured within a decoder hook.
  1849. The original exception is available in the 'hook_exception' attribute.
  1850. """
  1851. def __init__(self, hook_name, exc_info, encoded_obj, *args, **kwargs):
  1852. self.hook_name = hook_name
  1853. if not exc_info:
  1854. exc_info = (None, None, None)
  1855. exc_type, self.hook_exception, self.hook_traceback = exc_info
  1856. self.object_type = type(encoded_obj)
  1857. msg = "Hook %s raised %r while decoding type <%s>" % (hook_name, self.hook_exception.__class__.__name__, self.object_type.__name__)
  1858. if len(args) >= 1:
  1859. msg += ": " + args[0]
  1860. args = args[1:]
  1861. super(JSONDecodeHookError,self).__init__(msg, *args,**kwargs)
  1862. class JSONEncodeError(JSONError):
  1863. """An exception class raised when a python object can not be encoded as a JSON string."""
  1864. pass
  1865. class JSONEncodeHookError(JSONEncodeError):
  1866. """An exception that occured within an encoder hook.
  1867. The original exception is available in the 'hook_exception' attribute.
  1868. """
  1869. def __init__(self, hook_name, exc_info, encoded_obj, *args, **kwargs):
  1870. self.hook_name = hook_name
  1871. if not exc_info:
  1872. exc_info = (None, None, None)
  1873. exc_type, self.hook_exception, self.hook_traceback = exc_info
  1874. self.object_type = type(encoded_obj)
  1875. msg = "Hook %s raised %r while encoding type <%s>" % (self.hook_name, self.hook_exception.__class__.__name__, self.object_type.__name__)
  1876. if len(args) >= 1:
  1877. msg += ": " + args[0]
  1878. args = args[1:]
  1879. super(JSONEncodeHookError,self).__init__(msg, *args, **kwargs)
  1880. #----------------------------------------------------------------------
  1881. # Encoder state object
  1882. #----------------------------------------------------------------------
  1883. class encode_state(object):
  1884. """An internal transient object used during JSON encoding to
  1885. record the current construction state.
  1886. """
  1887. def __init__(self, jsopts=None, parent=None ):
  1888. import sys
  1889. self.chunks = []
  1890. if not parent:
  1891. self.parent = None
  1892. self.nest_level = 0
  1893. self.options = jsopts
  1894. self.escape_unicode_test = False # or a function f(unichar)=>True/False
  1895. else:
  1896. self.parent = parent
  1897. self.nest_level = parent.nest_level + 1
  1898. self.escape_unicode_test = parent.escape_unicode_test
  1899. self.options = parent.options
  1900. def make_substate(self):
  1901. return encode_state( parent=self )
  1902. def join_substate(self, other_state):
  1903. self.chunks.extend( other_state.chunks )
  1904. other_state.chunks = []
  1905. def append(self, s):
  1906. """Adds a string to the end of the current JSON document"""
  1907. self.chunks.append(s)
  1908. def combine(self):
  1909. """Returns the accumulated string and resets the state to empty"""
  1910. s = ''.join( self.chunks )
  1911. self.chunks = []
  1912. return s
  1913. def __eq__(self, other_state):
  1914. return self.nest_level == other_state.nest_level and \
  1915. self.chunks == other_state.chunks
  1916. def __lt__(self, other_state):
  1917. if self.nest_level != other_state.nest_level:
  1918. return self.nest_level < other_state.nest_level
  1919. return self.chunks < other_state.chunks
  1920. #----------------------------------------------------------------------
  1921. # Decoder statistics
  1922. #----------------------------------------------------------------------
  1923. class decode_statistics(object):
  1924. """An object that records various statistics about a decoded JSON document.
  1925. """
  1926. int8_max = 0x7f
  1927. int8_min = - 0x7f - 1
  1928. int16_max = 0x7fff
  1929. int16_min = - 0x7fff - 1
  1930. int32_max = 0x7fffffff
  1931. int32_min = - 0x7fffffff - 1
  1932. int64_max = 0x7fffffffffffffff
  1933. int64_min = - 0x7fffffffffffffff - 1
  1934. double_int_max = 2**53 - 1
  1935. double_int_min = - (2**53 - 1)
  1936. def __init__(self):
  1937. # Nesting
  1938. self.max_depth = 0
  1939. self.max_items_in_array = 0
  1940. self.max_items_in_object = 0
  1941. # Integer stats
  1942. self.num_ints = 0
  1943. self.num_ints_8bit = 0
  1944. self.num_ints_16bit = 0
  1945. self.num_ints_32bit = 0
  1946. self.num_ints_53bit = 0 # ints which will overflow IEEE doubles
  1947. self.num_ints_64bit = 0
  1948. self.num_ints_long = 0
  1949. self.num_negative_zero_ints = 0
  1950. # Floating-point stats
  1951. self.num_negative_zero_floats = 0
  1952. self.num_floats = 0
  1953. self.num_floats_decimal = 0 # overflowed 'float'
  1954. # String stats
  1955. self.num_strings = 0
  1956. self.max_string_length = 0
  1957. self.total_string_length = 0
  1958. self.min_codepoint = None
  1959. self.max_codepoint = None
  1960. # Other data type stats
  1961. self.num_arrays = 0
  1962. self.num_objects = 0
  1963. self.num_bools = 0
  1964. self.num_nulls = 0
  1965. self.num_undefineds = 0
  1966. self.num_nans = 0
  1967. self.num_infinities = 0
  1968. self.num_comments = 0
  1969. self.num_identifiers = 0 # JavaScript identifiers
  1970. self.num_excess_whitespace = 0
  1971. @property
  1972. def num_infinites(self):
  1973. """Misspelled 'num_infinities' for backwards compatibility"""
  1974. return self.num_infinities
  1975. def pretty_description(self, prefix=''):
  1976. import unicodedata
  1977. lines = [
  1978. "Number of integers:",
  1979. " 8-bit: %5d (%d to %d)" % (self.num_ints_8bit, self.int8_min, self.int8_max),
  1980. " 16-bit: %5d (%d to %d)" % (self.num_ints_16bit, self.int16_min, self.int16_max),
  1981. " 32-bit: %5d (%d to %d)" % (self.num_ints_32bit, self.int32_min, self.int32_max),
  1982. " > 53-bit: %5d (%d to %d - overflows JavaScript)" % (self.num_ints_53bit, self.double_int_min, self.double_int_max),
  1983. " 64-bit: %5d (%d to %d)" % (self.num_ints_64bit, self.int64_min, self.int64_max),
  1984. " > 64 bit: %5d (not portable, may require a \"Big Num\" package)" % self.num_ints_long,
  1985. " total ints: %5d" % self.num_ints,
  1986. " Num -0: %5d (negative-zero integers are not portable)" % self.num_negative_zero_ints,
  1987. "Number of floats:",
  1988. " doubles: %5d" % self.num_floats,
  1989. " > doubles: %5d (will overflow IEEE doubles)" % self.num_floats_decimal,
  1990. " total flts: %5d" % (self.num_floats + self.num_floats_decimal),
  1991. " Num -0.0: %5d (negative-zero floats are usually portable)" % self.num_negative_zero_floats,
  1992. "Number of:",
  1993. " nulls: %5d" % self.num_nulls,
  1994. " booleans: %5d" % self.num_bools,
  1995. " arrays: %5d" % self.num_arrays,
  1996. " objects: %5d" % self.num_objects,
  1997. "Strings:",
  1998. " number: %5d strings" % self.num_strings,
  1999. " max length: %5d characters" % self.max_string_length,
  2000. " total chars: %5d across all strings" % self.total_string_length,
  2001. ]
  2002. if self.min_codepoint is not None:
  2003. cp = 'U+%04X' % self.min_codepoint
  2004. try:
  2005. charname = unicodedata.name(unichr(self.min_codepoint))
  2006. except ValueError:
  2007. charname = '? UNKNOWN CHARACTER'
  2008. lines.append(" min codepoint: %6s (%s)" % (cp, charname))
  2009. else:
  2010. lines.append(" min codepoint: %6s" % ('n/a',))
  2011. if self.max_codepoint is not None:
  2012. cp = 'U+%04X' % self.max_codepoint
  2013. try:
  2014. charname = unicodedata.name(unichr(self.max_codepoint))
  2015. except ValueError:
  2016. charname = '? UNKNOWN CHARACTER'
  2017. lines.append(" max codepoint: %6s (%s)" % (cp, charname))
  2018. else:
  2019. lines.append(" max codepoint: %6s" % ('n/a',))
  2020. lines.extend([
  2021. "Other JavaScript items:",
  2022. " NaN: %5d" % self.num_nans,
  2023. " Infinite: %5d" % self.num_infinities,
  2024. " undefined: %5d" % self.num_undefineds,
  2025. " Comments: %5d" % self.num_comments,
  2026. " Identifiers: %5d" % self.num_identifiers,
  2027. "Max items in any array: %5d" % self.max_items_in_array,
  2028. "Max keys in any object: %5d" % self.max_items_in_object,
  2029. "Max nesting depth: %5d" % self.max_depth,
  2030. ])
  2031. if self.total_chars == 0:
  2032. lines.append("Unnecessary whitespace: 0 of 0 characters")
  2033. else:
  2034. lines.append(
  2035. "Unnecessary whitespace: %5d of %d characters (%.2f%%)" \
  2036. % (self.num_excess_whitespace, self.total_chars,
  2037. self.num_excess_whitespace * 100.0 / self.total_chars) )
  2038. if prefix:
  2039. return '\n'.join([ prefix+s for s in lines ]) + '\n'
  2040. else:
  2041. return '\n'.join( lines ) + '\n'
  2042. #----------------------------------------------------------------------
  2043. # Decoder state object
  2044. #----------------------------------------------------------------------
  2045. class decode_state(object):
  2046. """An internal transient object used during JSON decoding to
  2047. record the current parsing state and error messages.
  2048. """
  2049. def __init__(self, options=None):
  2050. self.reset()
  2051. self.options = options
  2052. def reset(self):
  2053. """Clears all errors, statistics, and input text."""
  2054. self.buf = None
  2055. self.errors = []
  2056. self.obj = None
  2057. self.cur_depth = 0 # how deep in nested structures are we?
  2058. self.stats = decode_statistics()
  2059. self._have_warned_nonbmp = False
  2060. self._have_warned_long_string = False
  2061. self._have_warned_max_depth = False
  2062. @property
  2063. def should_stop(self):
  2064. if self.has_fatal:
  2065. return True
  2066. return False
  2067. @property
  2068. def has_errors(self):
  2069. """Have any errors been seen already?"""
  2070. return len([err for err in self.errors if err.severity in ('fatal','error')]) > 0
  2071. @property
  2072. def has_fatal(self):
  2073. """Have any errors been seen already?"""
  2074. return len([err for err in self.errors if err.severity in ('fatal',)]) > 0
  2075. def set_input( self, txt, encoding=None ):
  2076. """Initialize the state by setting the input document text."""
  2077. import sys
  2078. self.reset()
  2079. try:
  2080. self.buf = buffered_stream( txt, encoding=encoding )
  2081. except JSONError as err:
  2082. err.position = 0 # set position to start of file
  2083. err.severity = 'fatal'
  2084. self.push_exception( err )
  2085. except Exception as err:
  2086. # Re-raise as JSONDecodeError
  2087. e2 = sys.exc_info()
  2088. newerr = JSONDecodeError("Error while reading input", position=0, severity='fatal')
  2089. self.push_exception( err )
  2090. self.buf = None
  2091. else:
  2092. if self.buf.bom:
  2093. self.push_cond( self.options.bom,
  2094. "JSON document was prefixed by a BOM (Byte Order Mark)",
  2095. self.buf.bom )
  2096. if not self.buf:
  2097. self.push_fatal( "Aborting, can not read JSON document.", position=0 )
  2098. def push_exception(self, exc):
  2099. """Add an already-built exception to the error list."""
  2100. self.errors.append(exc)
  2101. def push_fatal(self, message, *args, **kwargs):
  2102. """Create a fatal error."""
  2103. kwargs['severity'] = 'fatal'
  2104. self.__push_err( message, *args, **kwargs)
  2105. def push_error(self, message, *args, **kwargs):
  2106. """Create an error."""
  2107. kwargs['severity'] = 'error'
  2108. self.__push_err( message, *args, **kwargs)
  2109. def push_warning(self, message, *args, **kwargs):
  2110. """Create a warning."""
  2111. kwargs['severity'] = 'warning'
  2112. self.__push_err( message, *args, **kwargs)
  2113. def push_info(self, message, *args, **kwargs):
  2114. """Create a informational message."""
  2115. kwargs['severity'] = 'info'
  2116. self.__push_err( message, *args, **kwargs)
  2117. def push_cond(self, behavior_value, message, *args, **kwargs):
  2118. """Creates an conditional error or warning message.
  2119. The behavior value (from json_options) controls whether
  2120. a message will be pushed and whether it is an error
  2121. or warning message.
  2122. """
  2123. if behavior_value == ALLOW:
  2124. return
  2125. elif behavior_value == WARN:
  2126. kwargs['severity'] = 'warning'
  2127. else:
  2128. kwargs['severity'] = 'error'
  2129. self.__push_err( message, *args, **kwargs )
  2130. def __push_err(self, message, *args, **kwargs):
  2131. """Stores an error in the error list."""
  2132. position = None
  2133. outer_position = None
  2134. severity = 'error'
  2135. context_description = None
  2136. for kw, val in kwargs.items():
  2137. if kw == 'position': position = val
  2138. elif kw == 'outer_position': outer_position = val
  2139. elif kw == 'severity': severity = val
  2140. elif kw == 'context_description' or kw == 'context':
  2141. context_description=val
  2142. else:
  2143. raise TypeError('Unknown keyword argument',kw)
  2144. if position is None and self.buf:
  2145. position = self.buf.position # Current position
  2146. err = JSONDecodeError( message, position=position, outer_position=outer_position, context_description=context_description, severity=severity, *args)
  2147. self.push_exception( err )
  2148. def update_depth_stats(self, **kwargs):
  2149. st = self.stats
  2150. st.max_depth = max(st.max_depth, self.cur_depth)
  2151. if not self._have_warned_max_depth and self.cur_depth > self.options.warn_max_depth:
  2152. self._have_warned_max_depth = True
  2153. self.push_cond( self.options.non_portable,
  2154. "Arrays or objects nested deeper than %d levels may not be portable" \
  2155. % self.options.warn_max_depth )
  2156. def update_string_stats(self, s, **kwargs):
  2157. st = self.stats
  2158. st.num_strings += 1
  2159. st.max_string_length = max(st.max_string_length, len(s))
  2160. st.total_string_length += len(s)
  2161. if self.options.warn_string_length and len(s) > self.options.warn_string_length and not self._have_warned_long_string:
  2162. self._have_warned_long_string = True
  2163. self.push_cond( self.options.non_portable,
  2164. "Strings longer than %d may not be portable" % self.options.warn_string_length,
  2165. **kwargs )
  2166. if len(s) > 0:
  2167. mincp = ord(min(s))
  2168. maxcp = ord(max(s))
  2169. if st.min_codepoint is None:
  2170. st.min_codepoint = mincp
  2171. st.max_codepoint = maxcp
  2172. else:
  2173. st.min_codepoint = min( st.min_codepoint, mincp )
  2174. st.max_codepoint = max( st.max_codepoint, maxcp )
  2175. if maxcp > 0xffff and not self._have_warned_nonbmp:
  2176. self._have_warned_nonbmp = True
  2177. self.push_cond( self.options.non_portable,
  2178. "Strings containing non-BMP characters (U+%04X) may not be portable" % maxcp,
  2179. **kwargs )
  2180. def update_negzero_int_stats(self, **kwargs):
  2181. st = self.stats
  2182. st.num_negative_zero_ints += 1
  2183. if st.num_negative_zero_ints == 1: # Only warn once
  2184. self.push_cond( self.options.non_portable,
  2185. "Negative zero (-0) integers are usually not portable",
  2186. **kwargs )
  2187. def update_negzero_float_stats(self, **kwargs):
  2188. st = self.stats
  2189. st.num_negative_zero_floats += 1
  2190. if st.num_negative_zero_floats == 1: # Only warn once
  2191. self.push_cond( self.options.non_portable,
  2192. "Negative zero (-0.0) numbers may not be portable",
  2193. **kwargs)
  2194. def update_float_stats(self, float_value, **kwargs):
  2195. st = self.stats
  2196. if 'sign' in kwargs:
  2197. del kwargs['sign']
  2198. if helpers.is_negzero( float_value ):
  2199. self.update_negzero_float_stats( **kwargs )
  2200. if helpers.is_infinite( float_value ):
  2201. st.num_infinities += 1
  2202. if isinstance(float_value, decimal.Decimal):
  2203. st.num_floats_decimal += 1
  2204. if st.num_floats_decimal == 1: # Only warn once
  2205. self.push_cond( self.options.non_portable,
  2206. "Floats larger or more precise than an IEEE \"double\" may not be portable",
  2207. **kwargs)
  2208. elif isinstance(float_value, float):
  2209. st.num_floats += 1
  2210. def update_integer_stats(self, int_value, **kwargs ):
  2211. sign=kwargs.get('sign', 1)
  2212. if 'sign' in kwargs:
  2213. del kwargs['sign']
  2214. if int_value == 0 and sign < 0:
  2215. self.update_negzero_int_stats( **kwargs )
  2216. if sign < 0:
  2217. int_value = - int_value
  2218. st = self.stats
  2219. st.num_ints += 1
  2220. if st.int8_min <= int_value <= st.int8_max:
  2221. st.num_ints_8bit += 1
  2222. elif st.int16_min <= int_value <= st.int16_max:
  2223. st.num_ints_16bit += 1
  2224. elif st.int32_min <= int_value <= st.int32_max:
  2225. st.num_ints_32bit += 1
  2226. elif st.int64_min <= int_value <= st.int64_max:
  2227. st.num_ints_64bit += 1
  2228. else:
  2229. st.num_ints_long += 1
  2230. if int_value < st.double_int_min or st.double_int_max < int_value:
  2231. st.num_ints_53bit += 1
  2232. if st.num_ints_53bit == 1: # Only warn once
  2233. self.push_cond( self.options.non_portable,
  2234. "Integers larger than 53-bits are not portable",
  2235. **kwargs )
  2236. # ----------------------------------------------------------------------
  2237. # JSON strictness options
  2238. # ----------------------------------------------------------------------
  2239. STRICTNESS_STRICT = 'strict'
  2240. STRICTNESS_WARN = 'warn'
  2241. STRICTNESS_TOLERANT = 'tolerant'
  2242. ALLOW = 'allow'
  2243. WARN = 'warn'
  2244. FORBID = 'forbid'
  2245. # For float_type option
  2246. NUMBER_AUTO = 'auto'
  2247. NUMBER_FLOAT = 'float'
  2248. NUMBER_DECIMAL = 'decimal'
  2249. # For json_int class
  2250. NUMBER_FORMAT_DECIMAL = 'decimal'
  2251. NUMBER_FORMAT_HEX = 'hex'
  2252. NUMBER_FORMAT_LEGACYOCTAL = 'legacyoctal'
  2253. NUMBER_FORMAT_OCTAL = 'octal'
  2254. NUMBER_FORMAT_BINARY = 'binary'
  2255. class _behaviors_metaclass(type):
  2256. """Meta class used to establish a set of "behavior" options.
  2257. Classes that use this meta class must defined a class-level
  2258. variable called '_behaviors' that is a list of tuples, each of
  2259. which describes one behavior and is like: (behavior_name,
  2260. documentation). Also define a second class-level variable called
  2261. '_behavior_values' which is a list of the permitted values for
  2262. each behavior, each being strings.
  2263. For each behavior (e.g., pretty), and for each value (e.g.,
  2264. yes) the following methods/properties will be created:
  2265. * pretty - value of 'pretty' behavior (read-write)
  2266. * ispretty_yes - returns True if 'pretty' is 'yes'
  2267. For each value (e.g., pink) the following methods/properties
  2268. will be created:
  2269. * all_behaviors - set of all behaviors (read-only)
  2270. * pink_behaviors - set of behaviors with value of 'pink' (read-only)
  2271. * set_all('pink')
  2272. * set_all_pink() - set all behaviors to value of 'pink'
  2273. """
  2274. def __new__(cls, clsname, bases, attrs):
  2275. values = attrs.get('_behavior_values')
  2276. attrs['values'] = property( lambda self: set(self._behavior_values), doc='Set of possible behavior values')
  2277. behaviors = attrs.get('_behaviors')
  2278. def get_behavior(self, name):
  2279. """Returns the value for a given behavior"""
  2280. try:
  2281. return getattr( self, '_behavior_'+name )
  2282. except AttributeError:
  2283. raise ValueError('Unknown behavior',name)
  2284. attrs['get_behavior'] = get_behavior
  2285. def set_behavior(self, name, value):
  2286. """Changes the value for a given behavior"""
  2287. if value not in self._behavior_values:
  2288. raise ValueError('Unknown value for behavior',value)
  2289. varname = '_behavior_'+name
  2290. if hasattr(self,varname):
  2291. setattr( self, varname, value )
  2292. else:
  2293. raise ValueError('Unknown behavior',name)
  2294. attrs['set_behavior'] = set_behavior
  2295. def describe_behavior(self,name):
  2296. """Returns documentation about a given behavior."""
  2297. for n, doc in self._behaviors:
  2298. if n==name:
  2299. return doc
  2300. else:
  2301. raise AttributeError('No such behavior',name)
  2302. attrs['describe_behavior'] = describe_behavior
  2303. for name, doc in behaviors:
  2304. attrs['_behavior_'+name] = True
  2305. for v in values:
  2306. vs = v + '_' + name
  2307. def getx(self,name=name,forval=v):
  2308. return self.get_behavior(name) == forval
  2309. attrs['is_'+v+'_'+name] = property(getx,doc=v.capitalize()+' '+doc)
  2310. # method value_name()
  2311. fnset = lambda self,_name=name,_value=v: self.set_behavior(_name,_value)
  2312. fnset.__name__ = v+'_'+name
  2313. fnset.__doc__ = 'Set behavior ' + name + ' to ' + v + "."
  2314. attrs[fnset.__name__] = fnset
  2315. def get_value_for_behavior(self,name=name):
  2316. return self.get_behavior(name)
  2317. def set_value_for_behavior(self,value,name=name):
  2318. self.set_behavior(name,value)
  2319. attrs[name] = property(get_value_for_behavior,set_value_for_behavior,doc=doc)
  2320. @property
  2321. def all_behaviors(self):
  2322. """Returns the names of all known behaviors."""
  2323. return set([t[0] for t in self._behaviors])
  2324. attrs['all_behaviors'] = all_behaviors
  2325. def set_all(self,value):
  2326. """Changes all behaviors to have the given value."""
  2327. if value not in self._behavior_values:
  2328. raise ValueError('Unknown behavior',value)
  2329. for name in self.all_behaviors:
  2330. setattr(self, '_behavior_'+name, value)
  2331. attrs['set_all'] = set_all
  2332. def is_all(self,value):
  2333. """Determines if all the behaviors have the given value."""
  2334. if value not in self._behavior_values:
  2335. raise ValueError('Unknown behavior',value)
  2336. for name in self.all_behaviors:
  2337. if getattr(self, '_behavior_'+name) != value:
  2338. return False
  2339. return True
  2340. attrs['is_all'] = is_all
  2341. for v in values:
  2342. # property value_behaviors
  2343. def getbehaviorsfor(self,value=v):
  2344. return set([name for name in self.all_behaviors if getattr(self,name)==value])
  2345. attrs[v+'_behaviors'] = property(getbehaviorsfor,doc='Return the set of behaviors with the value '+v+'.')
  2346. # method set_all_value()
  2347. setfn = lambda self,_value=v: set_all(self,_value)
  2348. setfn.__name__ = 'set_all_'+v
  2349. setfn.__doc__ = 'Set all behaviors to value ' + v + "."
  2350. attrs[setfn.__name__] = setfn
  2351. # property is_all_value
  2352. attrs['is_all_'+v] = property( lambda self,v=v: is_all(self,v), doc='Determines if all the behaviors have the value '+v+'.')
  2353. def behaviors_eq(self, other):
  2354. """Determines if two options objects are equivalent."""
  2355. if self.all_behaviors != other.all_behaviors:
  2356. return False
  2357. return self.allowed_behaviors == other.allowed_behaviors
  2358. attrs['__eq__'] = behaviors_eq
  2359. return super(_behaviors_metaclass, cls).__new__(cls, clsname, bases, attrs)
  2360. SORT_NONE = 'none'
  2361. SORT_PRESERVE = 'preserve'
  2362. SORT_ALPHA = 'alpha'
  2363. SORT_ALPHA_CI = 'alpha_ci'
  2364. SORT_SMART = 'smart'
  2365. sorting_methods = {
  2366. SORT_NONE: "Do not sort, resulting order may be random",
  2367. SORT_PRESERVE: "Preserve original order when reformatting",
  2368. SORT_ALPHA: "Sort strictly alphabetically",
  2369. SORT_ALPHA_CI: "Sort alphabetically case-insensitive",
  2370. SORT_SMART: "Sort alphabetically and numerically (DEFAULT)"
  2371. }
  2372. sorting_method_aliases = {
  2373. 'ci': SORT_ALPHA_CI
  2374. }
  2375. def smart_sort_transform( key ):
  2376. numfmt = '%012d'
  2377. digits = '0123456789'
  2378. zero = ord('0')
  2379. if not key:
  2380. key = ''
  2381. elif isinstance( key, (int,long) ):
  2382. key = numfmt % key
  2383. elif isinstance( key, basestring ):
  2384. keylen = len(key)
  2385. words = []
  2386. i=0
  2387. while i < keylen:
  2388. if key[i] in digits:
  2389. num = 0
  2390. while i < keylen and key[i] in digits:
  2391. num *= 10
  2392. num += ord(key[i]) - zero
  2393. i += 1
  2394. words.append( numfmt % num )
  2395. else:
  2396. words.append( key[i].upper() )
  2397. i += 1
  2398. key = ''.join(words)
  2399. else:
  2400. key = str(key)
  2401. return key
  2402. # Find Enum type (introduced in Python 3.4)
  2403. try:
  2404. from enum import Enum as _enum
  2405. except ImportError:
  2406. _enum = None
  2407. # Find OrderedDict type
  2408. try:
  2409. from collections import OrderedDict as _OrderedDict
  2410. except ImportError:
  2411. _OrderedDict = None
  2412. class json_options(object):
  2413. """Options to determine how strict the decoder or encoder should be."""
  2414. __metaclass__ = _behaviors_metaclass
  2415. _behavior_values = (ALLOW, WARN, FORBID)
  2416. _behaviors = (
  2417. ("all_numeric_signs",
  2418. "Numbers may be prefixed by any \'+\' and \'-\', e.g., +4, -+-+77"),
  2419. ("any_type_at_start",
  2420. "A JSON document may start with any type, not just arrays or objects"),
  2421. ("comments",
  2422. "JavaScript comments, both /*...*/ and //... styles"),
  2423. ("control_char_in_string",
  2424. "Strings may contain raw control characters without \\u-escaping"),
  2425. ("hex_numbers",
  2426. "Hexadecimal numbers, e.g., 0x1f"),
  2427. ("binary_numbers",
  2428. "Binary numbers, e.g., 0b1001"),
  2429. ("octal_numbers",
  2430. "New-style octal numbers, e.g., 0o731 (see leading-zeros for legacy octals)"),
  2431. ("initial_decimal_point",
  2432. "Floating-point numbers may start with a decimal point (no units digit)"),
  2433. ("extended_unicode_escapes",
  2434. "Extended Unicode escape sequence \\u{..} for non-BMP characters"),
  2435. ("js_string_escapes",
  2436. "All JavaScript character \\-escape sequences may be in strings"),
  2437. ("leading_zeros",
  2438. "Numbers may have extra leading zeros (see --leading-zero-radix option)"),
  2439. ("non_numbers",
  2440. "Non-numbers may be used, such as NaN or Infinity"),
  2441. ("nonescape_characters",
  2442. "Unknown character \\-escape sequences stand for that character (\\Q -> 'Q')"),
  2443. ("identifier_keys",
  2444. "JavaScript identifiers are converted to strings when used as object keys"),
  2445. ("nonstring_keys",
  2446. "Value types other than strings (or identifiers) may be used as object keys"),
  2447. ("omitted_array_elements",
  2448. "Arrays may have omitted/elided elements, e.g., [1,,3] == [1,undefined,3]"),
  2449. ("single_quoted_strings",
  2450. "Strings may be delimited with both double (\") and single (\') quotation marks"),
  2451. ("trailing_comma",
  2452. "A final comma may end the list of array or object members"),
  2453. ("trailing_decimal_point",
  2454. "Floating-point number may end with a decimal point and no following fractional digits"),
  2455. ("undefined_values",
  2456. "The JavaScript 'undefined' value may be used"),
  2457. ("format_control_chars",
  2458. "Unicode \"format control characters\" may appear in the input"),
  2459. ("unicode_whitespace",
  2460. "Treat any Unicode whitespace character as valid whitespace"),
  2461. # Never legal
  2462. ("leading_zeros",
  2463. "Numbers may have leading zeros"),
  2464. # Normally warnings
  2465. ("duplicate_keys",
  2466. "Objects may have repeated keys"),
  2467. ("zero_byte",
  2468. "Strings may contain U+0000, which may not be safe for C-based programs"),
  2469. ("bom",
  2470. "A JSON document may start with a Unicode BOM (Byte Order Mark)"),
  2471. ("non_portable",
  2472. "Anything technically valid but likely to cause data portablibity issues"),
  2473. ) # end behavior list
  2474. def reset_to_defaults(self):
  2475. # Plain attrs (other than above behaviors) are simply copied
  2476. # by value, either during initialization (via keyword
  2477. # arguments) or via the copy() method.
  2478. self._plain_attrs = ['leading_zero_radix',
  2479. 'encode_namedtuple_as_object',
  2480. 'encode_enum_as',
  2481. 'encode_compactly',
  2482. 'escape_unicode',
  2483. 'always_escape_chars',
  2484. 'warn_string_length',
  2485. 'warn_max_depth',
  2486. 'int_as_float',
  2487. 'decimal_context',
  2488. 'float_type',
  2489. 'keep_format',
  2490. 'date_format',
  2491. 'datetime_format',
  2492. 'time_format',
  2493. 'timedelta_format',
  2494. 'sort_keys',
  2495. 'indent_amount', 'indent_tab_width', 'indent_limit',
  2496. 'max_items_per_line',
  2497. 'py2str_encoding' ]
  2498. self.strictness = STRICTNESS_WARN
  2499. self._leading_zero_radix = 8 # via property: leading_zero_radix
  2500. self._sort_keys = SORT_SMART # via property: sort_keys
  2501. self.int_as_float = False
  2502. self.float_type = NUMBER_AUTO
  2503. self.decimal_context = (decimal.DefaultContext if decimal else None)
  2504. self.keep_format = False # keep track of when numbers are hex, octal, etc.
  2505. self.encode_namedtuple_as_object = True
  2506. self._encode_enum_as = 'name' # via property
  2507. self.encode_compactly = True
  2508. self.escape_unicode = False
  2509. self.always_escape_chars = None # None, or a set of Unicode characters to always escape
  2510. self.warn_string_length = 0xfffd # with 16-bit length prefix
  2511. self.warn_max_depth = 64
  2512. self.date_format = 'iso' # or strftime format
  2513. self.datetime_format = 'iso' # or strftime format
  2514. self.time_format = 'iso' # or strftime format
  2515. self.timedelta_format = 'iso' # or 'hms'
  2516. self.sort_keys = SORT_ALPHA
  2517. self.indent_amount = 2
  2518. self.indent_tab_width = 0 # 0, or number of equivalent spaces
  2519. self.indent_limit = None
  2520. self.max_items_per_line = 1 # When encoding how many items per array/object
  2521. # before breaking into multiple lines
  2522. # For interpreting Python 2 'str' types:
  2523. if _py_major == 2:
  2524. self.py2str_encoding = 'ascii'
  2525. else:
  2526. self.py2str_encoding = None
  2527. def __init__(self, **kwargs):
  2528. """Set JSON encoding and decoding options.
  2529. If 'strict' is set to True, then only strictly-conforming JSON
  2530. output will be produced. Note that this means that some types
  2531. of values may not be convertable and will result in a
  2532. JSONEncodeError exception.
  2533. If 'compactly' is set to True, then the resulting string will
  2534. have all extraneous white space removed; if False then the
  2535. string will be "pretty printed" with whitespace and indentation
  2536. added to make it more readable.
  2537. If 'escape_unicode' is set to True, then all non-ASCII characters
  2538. will be represented as a unicode escape sequence; if False then
  2539. the actual real unicode character will be inserted if possible.
  2540. The 'escape_unicode' can also be a function, which when called
  2541. with a single argument of a unicode character will return True
  2542. if the character should be escaped or False if it should not.
  2543. """
  2544. self.reset_to_defaults()
  2545. if 'strict' in kwargs:
  2546. # Do this keyword first, so other keywords may override specific behaviors
  2547. self.strictness = kwargs['strict']
  2548. for kw,val in kwargs.items():
  2549. if kw == 'compactly': # alias for 'encode_compactly'
  2550. self.encode_compactly = val
  2551. elif kw == 'strict':
  2552. pass # Already handled
  2553. elif kw == 'warnings':
  2554. if val:
  2555. self.suppress_warnings()
  2556. elif kw == 'html_safe' or kw == 'xml_safe':
  2557. if bool(val):
  2558. if self.always_escape_chars is None:
  2559. self.always_escape_chars = set(u'<>/&')
  2560. else:
  2561. self.always_escape_chars.update( set(u'<>/&') )
  2562. elif kw == 'always_escape':
  2563. if val:
  2564. if self.always_escape_chars is None:
  2565. self.always_escape_chars = set(val)
  2566. else:
  2567. self.always_escape_chars.update( set(val) )
  2568. elif kw == 'int_as_float':
  2569. self.int_as_float = bool(val)
  2570. elif kw == 'keep_format':
  2571. self.keep_format = bool(val)
  2572. elif kw == 'float_type':
  2573. if val in (NUMBER_AUTO, NUMBER_FLOAT, NUMBER_DECIMAL):
  2574. self.float_type = val
  2575. else:
  2576. raise ValueError("Unknown option %r for argument %r to initialize %s" % (val,kw,self.__class__.__name__))
  2577. elif kw == 'decimal' or kw == 'decimal_context':
  2578. if decimal:
  2579. if not val or val == 'default':
  2580. self.decimal_context = decimal.DefaultContext
  2581. elif val == 'basic':
  2582. self.decimal_context = decimal.BasicContext
  2583. elif val == 'extended':
  2584. self.decimal_context = decimal.ExtendedContext
  2585. elif isinstance(val, decimal.Context):
  2586. self.decimal_context = val
  2587. elif isinstance(val,(int,long)) or val[0].isdigit:
  2588. prec = int(val)
  2589. self.decimal_context = decimal.Context( prec=prec )
  2590. else:
  2591. raise ValueError("Option for %r should be a decimal.Context, a number of significant digits, or one of 'default','basic', or 'extended'." % (kw,))
  2592. elif kw in ('allow','warn','forbid','prevent','deny'):
  2593. action = {'allow':ALLOW, 'warn':WARN, 'forbid':FORBID, 'prevent':FORBID, 'deny':FORBID}[ kw ]
  2594. if isinstance(val,basestring):
  2595. val = [b.replace('-','_') for b in val.replace(',',' ').split()]
  2596. for behavior in val:
  2597. self.set_behavior( behavior, action )
  2598. elif kw.startswith('allow_') or kw.startswith('forbid_') or kw.startswith('prevent_') or kw.startswith('deny_') or kw.startswith('warn_'):
  2599. action, behavior = kw.split('_',1)
  2600. if action == 'allow':
  2601. if val:
  2602. self.set_behavior( behavior, ALLOW )
  2603. else:
  2604. self.set_behavior( behavior, FORBID )
  2605. elif action in ('forbid','prevent','deny'):
  2606. if val:
  2607. self.set_behavior( behavior, FORBID )
  2608. else:
  2609. self.set_behavior( behavior, ALLOW )
  2610. elif action == 'warn':
  2611. if val:
  2612. self.set_behavior( behavior, WARN )
  2613. else:
  2614. self.set_behavior( behavior, ALLOW )
  2615. elif kw in self._plain_attrs:
  2616. setattr(self, kw, val)
  2617. else:
  2618. raise ValueError("Unknown keyword argument %r to initialize %s" % (kw,self.__class__.__name__))
  2619. def copy(self):
  2620. other = self.__class__()
  2621. other.copy_from( self )
  2622. return other
  2623. def copy_from(self, other):
  2624. if self is other:
  2625. return # Myself!
  2626. self.strictness = other.strictness # sets behaviors in bulk
  2627. for name in self.all_behaviors:
  2628. self.set_behavior( name, other.get_behavior(name) )
  2629. for name in self._plain_attrs:
  2630. val = getattr(other,name)
  2631. if isinstance(val, set):
  2632. val = val.copy()
  2633. elif decimal and isinstance(val, decimal.Decimal):
  2634. val = val.copy()
  2635. setattr(self, name, val)
  2636. def spaces_to_next_indent_level( self, min_spaces=1, subtract=0 ):
  2637. n = self.indent_amount - subtract
  2638. if n < 0:
  2639. n = 0
  2640. n = max( min_spaces, n )
  2641. return ' ' * n
  2642. def indentation_for_level( self, level=0 ):
  2643. """Returns a whitespace string used for indenting."""
  2644. if self.indent_limit is not None and level > self.indent_limit:
  2645. n = self.indent_limit
  2646. else:
  2647. n = level
  2648. n *= self.indent_amount
  2649. if self.indent_tab_width:
  2650. tw, sw = divmod(n, self.indent_tab_width)
  2651. return '\t'*tw + ' '*sw
  2652. else:
  2653. return ' ' * n
  2654. def set_indent( self, num_spaces, tab_width=0, limit=None ):
  2655. """Changes the indentation properties when outputting JSON in non-compact mode.
  2656. 'num_spaces' is the number of spaces to insert for each level
  2657. of indentation, which defaults to 2.
  2658. 'tab_width', if not 0, is the number of spaces which is equivalent
  2659. to one tab character. Tabs will be output where possible rather
  2660. than runs of spaces.
  2661. 'limit', if not None, is the maximum indentation level after
  2662. which no further indentation will be output.
  2663. """
  2664. n = int(num_spaces)
  2665. if n < 0:
  2666. raise ValueError("indentation amount can not be negative",n)
  2667. self.indent_amount = n
  2668. self.indent_tab_width = tab_width
  2669. self.indent_limit = limit
  2670. @property
  2671. def sort_keys(self):
  2672. """The method used to sort dictionary keys when encoding JSON
  2673. """
  2674. return self._sort_keys
  2675. @sort_keys.setter
  2676. def sort_keys(self, method):
  2677. if not method:
  2678. self._sort_keys = SORT_NONE
  2679. elif callable(method):
  2680. self._sort_keys = method
  2681. elif method in sorting_methods:
  2682. self._sort_keys = method
  2683. elif method in sorting_method_aliases: # alias
  2684. self._sort_keys = sorting_method_aliases[method]
  2685. elif method == True:
  2686. self._sort_keys = SORT_ALPHA
  2687. else:
  2688. raise ValueError("Not a valid sorting method: %r" % method)
  2689. @property
  2690. def encode_enum_as(self):
  2691. """The strategy for encoding Python Enum values.
  2692. """
  2693. return self._encode_enum_as
  2694. @encode_enum_as.setter
  2695. def encode_enum_as(self, val):
  2696. if val not in ('name','qname','value'):
  2697. raise ValueError("encode_enum_as must be one of 'name','qname', or 'value'")
  2698. self._encode_enum_as = val
  2699. @property
  2700. def zero_float(self):
  2701. """The numeric value 0.0, either a float or a decimal."""
  2702. if decimal and self.float_type == NUMBER_DECIMAL:
  2703. return self.decimal_context.create_decimal('0.0')
  2704. else:
  2705. return 0.0
  2706. @property
  2707. def negzero_float(self):
  2708. """The numeric value -0.0, either a float or a decimal."""
  2709. if decimal and self.float_type == NUMBER_DECIMAL:
  2710. return self.decimal_context.create_decimal('-0.0')
  2711. else:
  2712. return -0.0
  2713. @property
  2714. def nan(self):
  2715. """The numeric value NaN, either a float or a decimal."""
  2716. if decimal and self.float_type == NUMBER_DECIMAL:
  2717. return self.decimal_context.create_decimal('NaN')
  2718. else:
  2719. return nan
  2720. @property
  2721. def inf(self):
  2722. """The numeric value Infinity, either a float or a decimal."""
  2723. if decimal and self.float_type == NUMBER_DECIMAL:
  2724. return self.decimal_context.create_decimal('Infinity')
  2725. else:
  2726. return inf
  2727. @property
  2728. def neginf(self):
  2729. """The numeric value -Infinity, either a float or a decimal."""
  2730. if decimal and self.float_type == NUMBER_DECIMAL:
  2731. return self.decimal_context.create_decimal('-Infinity')
  2732. else:
  2733. return neginf
  2734. def make_int( self, s, sign=None, number_format=NUMBER_FORMAT_DECIMAL ):
  2735. """Makes an integer value according to the current options.
  2736. First argument should be a string representation of the number,
  2737. or an integer.
  2738. Returns a number value, which could be an int, float, or decimal.
  2739. """
  2740. if isinstance(sign, (int,long)):
  2741. if sign < 0:
  2742. sign = '-'
  2743. else:
  2744. sign = '+'
  2745. if isinstance(s,basestring):
  2746. if s.startswith('-') or s.startswith('+'):
  2747. sign = s[0]
  2748. s = s[1:]
  2749. if self.int_as_float:
  2750. # Making a float/decimal
  2751. if isinstance(s, (int,long)):
  2752. if self.float_type == NUMBER_DECIMAL:
  2753. n = self.decimal_context.create_decimal( s )
  2754. if sign=='-':
  2755. n = n.copy_negate()
  2756. elif s == 0 and sign=='-':
  2757. n = self.negzero_float
  2758. elif -999999999999999 <= s <= 999999999999999:
  2759. n = float(s)
  2760. if sign=='-':
  2761. n *= -1
  2762. else:
  2763. n = float(s)
  2764. if (n == inf or int(n) != s) and self.float_type != NUMBER_FLOAT:
  2765. n = self.decimal_context.create_decimal( s )
  2766. if sign=='-':
  2767. n = n.copy_negate()
  2768. elif sign=='-':
  2769. n *= -1
  2770. else: # not already an int
  2771. n = self.make_float( s, sign )
  2772. n2 = self.make_float( s[:-1] + ('9' if s[-1]<='5' else '0'), sign )
  2773. if (n==inf or n==n2) and self.float_type != NUMBER_FLOAT:
  2774. n = self.make_decimal( s, sign )
  2775. elif isinstance( s, (int,long) ):
  2776. # already an integer
  2777. n = s
  2778. if sign=='-':
  2779. if n == 0:
  2780. n = self.negzero_float
  2781. else:
  2782. n *= -1
  2783. else:
  2784. # Making an actual integer
  2785. try:
  2786. n = int( s )
  2787. except ValueError:
  2788. n = self.nan
  2789. else:
  2790. if sign=='-':
  2791. if n==0:
  2792. n = self.negzero_float
  2793. else:
  2794. n *= -1
  2795. if isinstance(n,(int,long)) and self.keep_format:
  2796. n = json_int(n, number_format=number_format)
  2797. return n
  2798. def make_decimal( self, s, sign='+' ):
  2799. """Converts a string into a decimal or float value."""
  2800. if not decimal or self.float_type == NUMBER_FLOAT:
  2801. return self.make_float( s, sign )
  2802. if s.startswith('-') or s.startswith('+'):
  2803. sign = s[0]
  2804. s = s[1:]
  2805. elif isinstance(sign, (int,long)):
  2806. if sign < 0:
  2807. sign = '-'
  2808. else:
  2809. sign = '+'
  2810. try:
  2811. f = self.decimal_context.create_decimal( s )
  2812. except decimal.InvalidOperation:
  2813. f = self.decimal_context.create_decimal( 'NaN' )
  2814. except decimal.Overflow:
  2815. if sign=='-':
  2816. f = self.decimal_context.create_decimal( '-Infinity' )
  2817. else:
  2818. f = self.decimal_context.create_decimal( 'Infinity' )
  2819. else:
  2820. if sign=='-':
  2821. f = f.copy_negate()
  2822. return f
  2823. def make_float( self, s, sign='+' ):
  2824. """Converts a string into a float or decimal value."""
  2825. if decimal and self.float_type == NUMBER_DECIMAL:
  2826. return self.make_decimal( s, sign )
  2827. if s.startswith('-') or s.startswith('+'):
  2828. sign = s[0]
  2829. s = s[1:]
  2830. elif isinstance(sign, (int,long)):
  2831. if sign < 0:
  2832. sign = '-'
  2833. else:
  2834. sign = '+'
  2835. try:
  2836. f = float(s)
  2837. except ValueError:
  2838. f = nan
  2839. else:
  2840. if sign=='-':
  2841. f *= -1
  2842. return f
  2843. @property
  2844. def leading_zero_radix(self):
  2845. """The radix to be used for numbers with leading zeros. 8 or 10
  2846. """
  2847. return self._leading_zero_radix
  2848. @leading_zero_radix.setter
  2849. def leading_zero_radix(self, radix):
  2850. if isinstance(radix,basestring):
  2851. try:
  2852. radix = int(radix)
  2853. except ValueError:
  2854. radix = radix.lower()
  2855. if radix=='octal' or radix=='oct' or radix=='8':
  2856. radix = 8
  2857. elif radix=='decimal' or radix=='dec':
  2858. radix = 10
  2859. if radix not in (8,10):
  2860. raise ValueError("Radix must either be 8 (octal) or 10 (decimal)")
  2861. self._leading_zero_radix = radix
  2862. @property
  2863. def leading_zero_radix_as_word(self):
  2864. return {8:'octal', 10:'decimal'}[ self._leading_zero_radix ]
  2865. def suppress_warnings(self):
  2866. for name in self.warn_behaviors:
  2867. self.set_behavior(name, 'allow')
  2868. @property
  2869. def allow_or_warn_behaviors(self):
  2870. """Returns the set of all behaviors that are not forbidden (i.e., are allowed or warned)."""
  2871. return self.allow_behaviors.union( self.warn_behaviors )
  2872. @property
  2873. def strictness(self):
  2874. return self._strictness
  2875. @strictness.setter
  2876. def strictness(self, strict):
  2877. """Changes whether the options should be re-configured for strict JSON conformance."""
  2878. if strict == STRICTNESS_WARN:
  2879. self._strictness = STRICTNESS_WARN
  2880. self.set_all_warn()
  2881. elif strict == STRICTNESS_STRICT or strict is True:
  2882. self._strictness = STRICTNESS_STRICT
  2883. self.keep_format = False
  2884. self.set_all_forbid()
  2885. self.warn_duplicate_keys()
  2886. self.warn_zero_byte()
  2887. self.warn_bom()
  2888. self.warn_non_portable()
  2889. elif strict == STRICTNESS_TOLERANT or strict is False:
  2890. self._strictness = STRICTNESS_TOLERANT
  2891. self.set_all_allow()
  2892. self.warn_duplicate_keys()
  2893. self.warn_zero_byte()
  2894. self.warn_leading_zeros()
  2895. self.leading_zero_radix = 8
  2896. self.warn_bom()
  2897. self.allow_non_portable()
  2898. else:
  2899. raise ValueError("Unknown strictness options %r" % strict)
  2900. self.allow_any_type_at_start()
  2901. # ----------------------------------------------------------------------
  2902. # The main JSON encoder/decoder class.
  2903. # ----------------------------------------------------------------------
  2904. class JSON(object):
  2905. """An encoder/decoder for JSON data streams.
  2906. Usually you will call the encode() or decode() methods. The other
  2907. methods are for lower-level processing.
  2908. Whether the JSON parser runs in strict mode (which enforces exact
  2909. compliance with the JSON spec) or the more forgiving non-string mode
  2910. can be affected by setting the 'strict' argument in the object's
  2911. initialization; or by assigning True or False to the 'strict'
  2912. property of the object.
  2913. You can also adjust a finer-grained control over strictness by
  2914. allowing or forbidding specific behaviors. You can get a list of
  2915. all the available behaviors by accessing the 'behaviors' property.
  2916. Likewise the 'allowed_behaviors' and 'forbidden_behaviors' list which
  2917. behaviors will be allowed and which will not. Call the allow()
  2918. or forbid() methods to adjust these.
  2919. """
  2920. _string_quotes = '"\''
  2921. _escapes_json = { # character escapes in JSON
  2922. '"': '"',
  2923. '/': '/',
  2924. '\\': '\\',
  2925. 'b': '\b',
  2926. 'f': '\f',
  2927. 'n': '\n',
  2928. 'r': '\r',
  2929. 't': '\t',
  2930. }
  2931. _escapes_js = { # character escapes in Javascript
  2932. '"': '"',
  2933. '\'': '\'',
  2934. '\\': '\\',
  2935. 'b': '\b',
  2936. 'f': '\f',
  2937. 'n': '\n',
  2938. 'r': '\r',
  2939. 't': '\t',
  2940. 'v': '\v',
  2941. '0': '\x00'
  2942. }
  2943. # Following is a reverse mapping of escape characters, used when we
  2944. # output JSON. Only those escapes which are always safe (e.g., in JSON)
  2945. # are here. It won't hurt if we leave questionable ones out.
  2946. _rev_escapes = {'\n': '\\n',
  2947. '\t': '\\t',
  2948. '\b': '\\b',
  2949. '\r': '\\r',
  2950. '\f': '\\f',
  2951. '"': '\\"',
  2952. '\\': '\\\\' }
  2953. _optional_rev_escapes = { '/': '\\/' } # only escaped if forced to do so
  2954. json_syntax_characters = u"{}[]\"\\,:0123456789.-+abcdefghijklmnopqrstuvwxyz \t\n\r"
  2955. all_hook_names = ('decode_number', 'decode_float', 'decode_object',
  2956. 'decode_array', 'decode_string',
  2957. 'encode_value', 'encode_dict', 'encode_dict_key',
  2958. 'encode_sequence', 'encode_bytes', 'encode_default')
  2959. def __init__(self, **kwargs):
  2960. """Creates a JSON encoder/decoder object.
  2961. You may pass encoding and decoding options either by passing
  2962. an argument named 'json_options' with an instance of a
  2963. json_options class; or with individual keyword/values that will
  2964. be used to initialize a new json_options object.
  2965. You can also set hooks by using keyword arguments using the
  2966. hook name; e.g., encode_dict=my_hook_func.
  2967. """
  2968. import sys, unicodedata, re
  2969. kwargs = kwargs.copy()
  2970. # Initialize hooks
  2971. for hookname in self.all_hook_names:
  2972. if hookname in kwargs:
  2973. self.set_hook( hookname, kwargs[hookname] )
  2974. del kwargs[hookname]
  2975. else:
  2976. self.set_hook( hookname, None )
  2977. # Set options
  2978. if 'json_options' in kwargs:
  2979. self._options = kwargs['json_options']
  2980. else:
  2981. self._options = json_options(**kwargs)
  2982. # The following is a boolean map of the first 256 characters
  2983. # which will quickly tell us which of those characters never
  2984. # need to be escaped.
  2985. self._asciiencodable = \
  2986. [32 <= c < 128 \
  2987. and not self._rev_escapes.has_key(chr(c)) \
  2988. and not unicodedata.category(unichr(c)) in ['Cc','Cf','Zl','Zp']
  2989. for c in range(0,256)]
  2990. @property
  2991. def options(self):
  2992. """The optional behaviors used, e.g., the JSON conformance
  2993. strictness. Returns an instance of json_options.
  2994. """
  2995. return self._options
  2996. def clear_hook(self, hookname):
  2997. """Unsets a hook callback, as previously set with set_hook()."""
  2998. self.set_hook( hookname, None )
  2999. def clear_all_hooks(self):
  3000. """Unsets all hook callbacks, as previously set with set_hook()."""
  3001. for hookname in self.all_hook_names:
  3002. self.clear_hook( hookname )
  3003. def set_hook(self, hookname, function):
  3004. """Sets a user-defined callback function used during encoding or decoding.
  3005. The 'hookname' argument must be a string containing the name of
  3006. one of the available hooks, listed below.
  3007. The 'function' argument must either be None, which disables the hook,
  3008. or a callable function. Hooks do not stack, if you set a hook it will
  3009. undo any previously set hook.
  3010. Netsted values. When decoding JSON that has nested objects or
  3011. arrays, the decoding hooks will be called once for every
  3012. corresponding value, even if nested. Generally the decoding
  3013. hooks will be called from the inner-most value outward, and
  3014. then left to right.
  3015. Skipping. Any hook function may raise a JSONSkipHook exception
  3016. if it does not wish to handle the particular invocation. This
  3017. will have the effect of skipping the hook for that particular
  3018. value, as if the hook was net set.
  3019. AVAILABLE HOOKS:
  3020. * decode_string
  3021. Called for every JSON string literal with the
  3022. Python-equivalent string value as an argument. Expects to
  3023. get a Python object in return.
  3024. * decode_float:
  3025. Called for every JSON number that looks like a float (has
  3026. a "."). The string representation of the number is passed
  3027. as an argument. Expects to get a Python object in return.
  3028. * decode_number:
  3029. Called for every JSON number. The string representation of
  3030. the number is passed as an argument. Expects to get a
  3031. Python object in return. NOTE: If the number looks like a
  3032. float and the 'decode_float' hook is set, then this hook
  3033. will not be called.
  3034. * decode_array:
  3035. Called for every JSON array. A Python list is passed as
  3036. the argument, and expects to get a Python object back.
  3037. NOTE: this hook will get called for every array, even
  3038. for nested arrays.
  3039. * decode_object:
  3040. Called for every JSON object. A Python dictionary is passed
  3041. as the argument, and expects to get a Python object back.
  3042. NOTE: this hook will get called for every object, even
  3043. for nested objects.
  3044. * encode_value:
  3045. Called for every Python object which is to be encoded into JSON.
  3046. * encode_dict:
  3047. Called for every Python dictionary or anything that looks
  3048. like a dictionary.
  3049. * encode_dict_key:
  3050. Called for every dictionary key.
  3051. * encode_sequence:
  3052. Called for every Python sequence-like object that is not a
  3053. dictionary or string. This includes lists and tuples.
  3054. * encode_bytes:
  3055. Called for every Python bytes or bytearray type; or for
  3056. any memoryview with a byte ('B') item type. (Python 3 only)
  3057. * encode_default:
  3058. Called for any Python type which can not otherwise be converted
  3059. into JSON, even after applying any other encoding hooks.
  3060. """
  3061. if hookname in self.all_hook_names:
  3062. att = hookname + '_hook'
  3063. if function != None and not callable(function):
  3064. raise ValueError("Hook %r must be None or a callable function" % hookname)
  3065. setattr( self, att, function )
  3066. else:
  3067. raise ValueError("Unknown hook name %r" % hookname)
  3068. def has_hook(self, hook_name):
  3069. if not hook_name or hook_name not in self.all_hook_names:
  3070. return False
  3071. hook = getattr( self, hook_name + '_hook' )
  3072. return callable(hook)
  3073. def call_hook(self, hook_name, input_object, position=None, *args, **kwargs):
  3074. """Wrapper function to invoke a user-supplied hook function.
  3075. This will capture any exceptions raised by the hook and do something
  3076. appropriate with it.
  3077. """
  3078. import sys
  3079. if hook_name not in self.all_hook_names:
  3080. raise AttributeError("No such hook %r" % hook_name)
  3081. hook = getattr( self, hook_name + '_hook' )
  3082. if not callable(hook):
  3083. raise TypeError("Hook is not callable: %r" % (hook,))
  3084. try:
  3085. rval = hook( input_object, *args, **kwargs )
  3086. except JSONSkipHook:
  3087. raise # Do nothing
  3088. except Exception, err:
  3089. exc_info = sys.exc_info()
  3090. if hook_name.startswith('encode_'):
  3091. ex_class = JSONEncodeHookError
  3092. else:
  3093. ex_class = JSONDecodeHookError
  3094. if isinstance(err, JSONStopProcessing):
  3095. severity = 'fatal'
  3096. else:
  3097. severity = 'error'
  3098. newerr = ex_class( hook_name, exc_info, input_object, *args, position=position, severity=severity )
  3099. # Simulate Python 3's: "raise X from Y" exception chaining
  3100. newerr.__cause__ = err
  3101. newerr.__traceback__ = exc_info[2]
  3102. raise newerr
  3103. return rval
  3104. def isws(self, c):
  3105. """Determines if the given character is considered as white space.
  3106. Note that Javscript is much more permissive on what it considers
  3107. to be whitespace than does JSON.
  3108. Ref. ECMAScript section 7.2
  3109. """
  3110. if not self.options.unicode_whitespace:
  3111. return c in ' \t\n\r'
  3112. else:
  3113. if not isinstance(c,unicode):
  3114. c = unicode(c)
  3115. if c in u' \t\n\r\f\v':
  3116. return True
  3117. import unicodedata
  3118. return unicodedata.category(c) == 'Zs'
  3119. def islineterm(self, c):
  3120. """Determines if the given character is considered a line terminator.
  3121. Ref. ECMAScript section 7.3
  3122. """
  3123. if c == '\r' or c == '\n':
  3124. return True
  3125. if c == u'\u2028' or c == u'\u2029': # unicodedata.category(c) in ['Zl', 'Zp']
  3126. return True
  3127. return False
  3128. def recover_parser(self, state):
  3129. """Try to recover after a syntax error by locating the next "known" position."""
  3130. buf = state.buf
  3131. buf.skipuntil( lambda c: c in ",:[]{}\"\';" or helpers.char_is_unicode_eol(c) )
  3132. stopchar = buf.peek()
  3133. self.skipws(state)
  3134. if buf.at_end:
  3135. state.push_info("Could not recover parsing after previous error",position=buf.position)
  3136. else:
  3137. state.push_info("Recovering parsing after character %r" % stopchar, position=buf.position)
  3138. return stopchar
  3139. def decode_null(self, state):
  3140. """Intermediate-level decoder for ECMAScript 'null' keyword.
  3141. Takes a string and a starting index, and returns a Python
  3142. None object and the index of the next unparsed character.
  3143. """
  3144. buf = state.buf
  3145. start_position = buf.position
  3146. kw = buf.pop_identifier()
  3147. if not kw or kw != 'null':
  3148. state.push_error("Expected a 'null' keyword'", kw, position=start_position)
  3149. else:
  3150. state.stats.num_nulls += 1
  3151. return None
  3152. def encode_undefined(self, state):
  3153. """Produces the ECMAScript 'undefined' keyword."""
  3154. state.append('undefined')
  3155. def encode_null(self, state):
  3156. """Produces the JSON 'null' keyword."""
  3157. state.append('null')
  3158. def decode_boolean(self, state):
  3159. """Intermediate-level decode for JSON boolean literals.
  3160. Takes a string and a starting index, and returns a Python bool
  3161. (True or False) and the index of the next unparsed character.
  3162. """
  3163. buf = state.buf
  3164. start_position = buf.position
  3165. kw = buf.pop_identifier()
  3166. if not kw or kw not in ('true','false'):
  3167. state.push_error("Expected a 'true' or 'false' keyword'", kw, position=start_position)
  3168. else:
  3169. state.stats.num_bools += 1
  3170. return (kw == 'true')
  3171. def encode_boolean(self, bval, state):
  3172. """Encodes the Python boolean into a JSON Boolean literal."""
  3173. state.append( 'true' if bool(bval) else 'false' )
  3174. def decode_number(self, state):
  3175. """Intermediate-level decoder for JSON numeric literals.
  3176. Takes a string and a starting index, and returns a Python
  3177. suitable numeric type and the index of the next unparsed character.
  3178. The returned numeric type can be either of a Python int,
  3179. long, or float. In addition some special non-numbers may
  3180. also be returned such as nan, inf, and neginf (technically
  3181. which are Python floats, but have no numeric value.)
  3182. Ref. ECMAScript section 8.5.
  3183. """
  3184. buf = state.buf
  3185. self.skipws(state)
  3186. start_position = buf.position
  3187. # Use external number parser hook if available
  3188. if self.has_hook('decode_number') or self.has_hook('decode_float'):
  3189. c = buf.peek()
  3190. if c and c in '-+0123456789.': # First chars for a number-like value
  3191. buf.save_position()
  3192. nbr = buf.pop_while_in( '-+0123456789abcdefABCDEF' 'NaN' 'Infinity.' )
  3193. if '.' in nbr and self.has_hook('decode_float'):
  3194. hook_name = 'decode_float'
  3195. elif self.has_hook('decode_number'):
  3196. hook_name = 'decode_number'
  3197. else:
  3198. hook_name = None
  3199. if hook_name:
  3200. try:
  3201. val = self.call_hook( hook_name, nbr, position=start_position )
  3202. except JSONSkipHook:
  3203. pass
  3204. except JSONError, err:
  3205. state.push_exception(err)
  3206. val = undefined
  3207. else:
  3208. buf.clear_saved_position()
  3209. return val
  3210. # Hook didn't handle it, restore old position
  3211. buf.restore_position()
  3212. # Detect initial sign character(s)
  3213. sign = +1
  3214. sign_count = 0
  3215. sign_saw_plus = False
  3216. sign_saw_ws = False
  3217. c = buf.peek()
  3218. while c and c in '+-':
  3219. if c == '-':
  3220. sign = sign * -1
  3221. elif c == '+':
  3222. sign_saw_plus = True
  3223. sign_count += 1
  3224. buf.skip()
  3225. if self.skipws_nocomments(state) > 0:
  3226. sign_saw_ws = True
  3227. c = buf.peek()
  3228. if sign_count > 1 or sign_saw_plus:
  3229. state.push_cond( self.options.all_numeric_signs,
  3230. 'Numbers may only have a single "-" as a sign prefix',
  3231. position=start_position)
  3232. if sign_saw_ws:
  3233. state.push_error('Spaces may not appear between a +/- number sign and the digits', position=start_position)
  3234. # Check for ECMAScript symbolic non-numbers
  3235. if not c:
  3236. state.push_error('Missing numeric value after sign', position=start_position)
  3237. self.recover_parser(state)
  3238. self.stats.num_undefineds += 1
  3239. return undefined
  3240. elif c.isalpha() or c in '_$':
  3241. kw = buf.popwhile( lambda c: c.isalnum() or c in '_$' )
  3242. if kw == 'NaN':
  3243. state.push_cond( self.options.non_numbers,
  3244. 'NaN literals are not allowed in strict JSON',
  3245. position=start_position)
  3246. state.stats.num_nans += 1
  3247. return self.options.nan
  3248. elif kw == 'Infinity':
  3249. state.push_cond( self.options.non_numbers,
  3250. 'Infinity literals are not allowed in strict JSON',
  3251. position=start_position)
  3252. state.stats.num_infinities += 1
  3253. if sign < 0:
  3254. return self.options.neginf
  3255. else:
  3256. return self.options.inf
  3257. else:
  3258. state.push_error('Unknown numeric value keyword', kw, position=start_position)
  3259. return undefined
  3260. # Check for radix-prefixed numbers
  3261. elif c == '0' and (buf.peek(1) in [u'x',u'X']):
  3262. # ----- HEX NUMBERS 0x123
  3263. prefix = buf.popstr(2)
  3264. digits = buf.popwhile( helpers.is_hex_digit )
  3265. state.push_cond( self.options.hex_numbers,
  3266. 'Hexadecimal literals are not allowed in strict JSON', prefix+digits,
  3267. position=start_position )
  3268. if len(digits)==0:
  3269. state.push_error('Hexadecimal number is invalid', position=start_position)
  3270. self.recover_parser(state)
  3271. return undefined
  3272. ival = helpers.decode_hex( digits )
  3273. state.update_integer_stats( ival, sign=sign, position=start_position )
  3274. n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_HEX )
  3275. return n
  3276. elif c == '0' and (buf.peek(1) in [u'o','O']):
  3277. # ----- NEW-STYLE OCTAL NUMBERS 0o123
  3278. prefix = buf.popstr(2)
  3279. digits = buf.popwhile( helpers.is_octal_digit )
  3280. state.push_cond( self.options.octal_numbers,
  3281. "Octal literals are not allowed in strict JSON", prefix+digits,
  3282. position=start_position )
  3283. if len(digits)==0:
  3284. state.push_error("Octal number is invalid", position=start_position)
  3285. self.recover_parser(state)
  3286. return undefined
  3287. ival = helpers.decode_octal( digits )
  3288. state.update_integer_stats( ival, sign=sign, position=start_position )
  3289. n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_OCTAL )
  3290. return n
  3291. elif c == '0' and (buf.peek(1) in [u'b','B']):
  3292. # ----- NEW-STYLE BINARY NUMBERS 0b1101
  3293. prefix = buf.popstr(2)
  3294. digits = buf.popwhile( helpers.is_binary_digit )
  3295. state.push_cond( self.options.binary_numbers,
  3296. "Binary literals are not allowed in strict JSON", prefix+digits,
  3297. position=start_position )
  3298. if len(digits)==0:
  3299. state.push_error("Binary number is invalid", position=start_position)
  3300. self.recover_parser(state)
  3301. return undefined
  3302. ival = helpers.decode_binary( digits )
  3303. state.update_integer_stats( ival, sign=sign, position=start_position )
  3304. n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_BINARY )
  3305. return n
  3306. else:
  3307. # ----- DECIMAL OR LEGACY-OCTAL NUMBER. 123, 0123
  3308. # General syntax is: \d+[\.\d+][e[+-]?\d+]
  3309. number = buf.popwhile( lambda c: c in '0123456789.+-eE' )
  3310. imax = len(number)
  3311. if imax == 0:
  3312. state.push_error('Missing numeric value', position=start_position)
  3313. has_leading_zero = False
  3314. units_digits = [] # digits making up whole number portion
  3315. fraction_digits = [] # digits making up fractional portion
  3316. exponent_digits = [] # digits making up exponent portion (excluding sign)
  3317. esign = '+' # sign of exponent
  3318. sigdigits = 0 # number of significant digits (approximate)
  3319. saw_decimal_point = False
  3320. saw_exponent = False
  3321. # Break number into parts in a first pass...use a mini state machine
  3322. in_part = 'units'
  3323. for i, c in enumerate(number):
  3324. if c == '.':
  3325. if in_part != 'units':
  3326. state.push_error('Bad number', number, position=start_position)
  3327. self.recover_parser(state)
  3328. return undefined
  3329. in_part = 'fraction'
  3330. saw_decimal_point = True
  3331. elif c in 'eE':
  3332. if in_part == 'exponent':
  3333. state.push_error('Bad number', number, position=start_position)
  3334. self.recover_parser(state)
  3335. return undefined
  3336. in_part = 'exponent'
  3337. saw_exponent = True
  3338. elif c in '+-':
  3339. if in_part != 'exponent' or exponent_digits:
  3340. state.push_error('Bad number', number, position=start_position)
  3341. self.recover_parser(state)
  3342. return undefined
  3343. esign = c
  3344. else: #digit
  3345. if in_part == 'units':
  3346. units_digits.append( c )
  3347. elif in_part == 'fraction':
  3348. fraction_digits.append( c )
  3349. elif in_part == 'exponent':
  3350. exponent_digits.append( c )
  3351. units_s = ''.join(units_digits)
  3352. fraction_s = ''.join(fraction_digits)
  3353. exponent_s = ''.join(exponent_digits)
  3354. # Basic syntax rules checking
  3355. is_integer = not (saw_decimal_point or saw_exponent)
  3356. if not units_s and not fraction_s:
  3357. state.push_error('Bad number', number, position=start_position)
  3358. self.recover_parser(state)
  3359. return undefined
  3360. if saw_decimal_point and not fraction_s:
  3361. state.push_cond( self.options.trailing_decimal_point,
  3362. 'Bad number, decimal point must be followed by at least one digit',
  3363. number, position=start_position)
  3364. fraction_s = '0'
  3365. if saw_exponent and not exponent_s:
  3366. state.push_error('Bad number, exponent is missing', number, position=start_position)
  3367. self.recover_parser(state)
  3368. return undefined
  3369. if not units_s:
  3370. state.push_cond( self.options.initial_decimal_point,
  3371. 'Bad number, decimal point must be preceded by at least one digit',
  3372. number, position=start_position)
  3373. units = '0'
  3374. elif len(units_s) > 1 and units_s[0] == '0':
  3375. has_leading_zero = True
  3376. if self.options.is_forbid_leading_zeros:
  3377. state.push_cond( self.options.leading_zeros,
  3378. 'Numbers may not have extra leading zeros',
  3379. number, position=start_position)
  3380. elif self.options.is_warn_leading_zeros:
  3381. state.push_cond( self.options.leading_zeros,
  3382. 'Numbers may not have leading zeros; interpreting as %s' \
  3383. % self.options.leading_zero_radix_as_word,
  3384. number, position=start_position)
  3385. # Estimate number of significant digits
  3386. sigdigits = len( (units_s + fraction_s).replace('0',' ').strip() )
  3387. # Handle legacy octal integers.
  3388. if has_leading_zero and is_integer and self.options.leading_zero_radix == 8:
  3389. # ----- LEGACY-OCTAL 0123
  3390. try:
  3391. ival = helpers.decode_octal( units_s )
  3392. except ValueError:
  3393. state.push_error('Bad number, not a valid octal value', number, position=start_position)
  3394. self.recover_parser(state)
  3395. return self.options.nan # undefined
  3396. state.update_integer_stats( ival, sign=sign, position=start_position )
  3397. n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_LEGACYOCTAL )
  3398. return n
  3399. # Determine the exponential part
  3400. if exponent_s:
  3401. try:
  3402. exponent = int(exponent_s)
  3403. except ValueError:
  3404. state.push_error('Bad number, bad exponent', number, position=start_position)
  3405. self.recover_parser(state)
  3406. return undefined
  3407. if esign == '-':
  3408. exponent = - exponent
  3409. else:
  3410. exponent = 0
  3411. # Try to make an int/long first.
  3412. if not saw_decimal_point and exponent >= 0:
  3413. # ----- A DECIMAL INTEGER
  3414. ival = int(units_s)
  3415. if exponent != 0:
  3416. ival *= 10**exponent
  3417. state.update_integer_stats( ival, sign=sign, position=start_position )
  3418. n = state.options.make_int( ival, sign )
  3419. else:
  3420. # ----- A FLOATING-POINT NUMBER
  3421. try:
  3422. if exponent < float_minexp or exponent > float_maxexp or sigdigits > float_sigdigits:
  3423. n = state.options.make_decimal( number, sign )
  3424. else:
  3425. n = state.options.make_float( number, sign )
  3426. except ValueError as err:
  3427. state.push_error('Bad number, %s' % err.message, number, position=start_position)
  3428. n = undefined
  3429. else:
  3430. state.update_float_stats( n, sign=sign, position=start_position )
  3431. return n
  3432. def encode_number(self, n, state):
  3433. """Encodes a Python numeric type into a JSON numeric literal.
  3434. The special non-numeric values of float('nan'), float('inf')
  3435. and float('-inf') are translated into appropriate JSON
  3436. literals.
  3437. Note that Python complex types are not handled, as there is no
  3438. ECMAScript equivalent type.
  3439. """
  3440. if isinstance(n, complex):
  3441. if n.imag:
  3442. raise JSONEncodeError('Can not encode a complex number that has a non-zero imaginary part',n)
  3443. n = n.real
  3444. if isinstance(n, json_int):
  3445. state.append( n.json_format() )
  3446. return
  3447. if isinstance(n, (int,long)):
  3448. state.append( str(n) )
  3449. return
  3450. if decimal and isinstance(n, decimal.Decimal):
  3451. if n.is_nan(): # Could be 'NaN' or 'sNaN'
  3452. state.append( 'NaN' )
  3453. elif n.is_infinite():
  3454. if n.is_signed():
  3455. state.append( '-Infinity' )
  3456. else:
  3457. state.append( 'Infinity' )
  3458. else:
  3459. s = str(n).lower()
  3460. if 'e' not in s and '.' not in s:
  3461. s = s + '.0'
  3462. state.append( s )
  3463. return
  3464. global nan, inf, neginf
  3465. if n is nan:
  3466. state.append( 'NaN' )
  3467. elif n is inf:
  3468. state.append( 'Infinity' )
  3469. elif n is neginf:
  3470. state.append( '-Infinity' )
  3471. elif isinstance(n, float):
  3472. # Check for non-numbers.
  3473. # In python nan == inf == -inf, so must use repr() to distinguish
  3474. reprn = repr(n).lower()
  3475. if ('inf' in reprn and '-' in reprn) or n == neginf:
  3476. state.append( '-Infinity' )
  3477. elif 'inf' in reprn or n is inf:
  3478. state.append( 'Infinity' )
  3479. elif 'nan' in reprn or n is nan:
  3480. state.append( 'NaN' )
  3481. else:
  3482. # A normal float.
  3483. state.append( repr(n) )
  3484. else:
  3485. raise TypeError('encode_number expected an integral, float, or decimal number type',type(n))
  3486. def decode_string(self, state):
  3487. """Intermediate-level decoder for JSON string literals.
  3488. Takes a string and a starting index, and returns a Python
  3489. string (or unicode string) and the index of the next unparsed
  3490. character.
  3491. """
  3492. buf = state.buf
  3493. self.skipws(state)
  3494. quote = buf.peek()
  3495. if quote == '"':
  3496. pass
  3497. elif quote == "'":
  3498. state.push_cond( self.options.single_quoted_strings,
  3499. 'String literals must use double quotation marks in strict JSON' )
  3500. else:
  3501. state.push_error('String literal must be properly quoted')
  3502. return undefined
  3503. string_position = buf.position
  3504. buf.skip()
  3505. if self.options.is_forbid_js_string_escapes:
  3506. escapes = self._escapes_json
  3507. else:
  3508. escapes = self._escapes_js
  3509. ccallowed = not self.options.is_forbid_control_char_in_string
  3510. chunks = []
  3511. _append = chunks.append
  3512. # Used to track the last seen high-surrogate character
  3513. high_surrogate = None
  3514. highsur_position = None
  3515. # Used to track if errors occured so we don't keep reporting multiples
  3516. had_lineterm_error = False
  3517. # Start looping character by character until the final quotation mark
  3518. saw_final_quote = False
  3519. should_stop = False
  3520. while not saw_final_quote and not should_stop:
  3521. if buf.at_end:
  3522. state.push_error("String literal is not terminated",
  3523. outer_position=string_position, context='String')
  3524. break
  3525. c = buf.peek()
  3526. # Make sure a high surrogate is immediately followed by a low surrogate
  3527. if high_surrogate:
  3528. if 0xdc00 <= ord(c) <= 0xdfff:
  3529. low_surrogate = buf.pop()
  3530. try:
  3531. uc = helpers.surrogate_pair_as_unicode( high_surrogate, low_surrogate )
  3532. except ValueError as err:
  3533. state.push_error( 'Illegal Unicode surrogate pair', (high_surrogate, low_surrogate),
  3534. position=highsur_position, outer_position=string_position,
  3535. context='String')
  3536. should_stop = state.should_stop
  3537. uc = u'\ufffd' # replacement char
  3538. _append( uc )
  3539. high_surrogate = None
  3540. highsur_position = None
  3541. continue # ==== NEXT CHAR
  3542. elif buf.peekstr(2) != '\\u':
  3543. state.push_error('High unicode surrogate must be followed by a low surrogate',
  3544. position=highsur_position, outer_position=string_position,
  3545. context='String')
  3546. should_stop = state.should_stop
  3547. _append( u'\ufffd' ) # replacement char
  3548. high_surrogate = None
  3549. highsur_position = None
  3550. if c == quote:
  3551. buf.skip() # skip over closing quote
  3552. saw_final_quote = True
  3553. break
  3554. elif c == '\\':
  3555. # Escaped character
  3556. escape_position = buf.position
  3557. buf.skip() # skip over backslash
  3558. c = buf.peek()
  3559. if not c:
  3560. state.push_error('Escape in string literal is incomplete', position=escape_position,
  3561. outer_position=string_position, context='String')
  3562. should_stop = state.should_stop
  3563. break
  3564. elif helpers.is_octal_digit(c):
  3565. # Handle octal escape codes first so special \0 doesn't kick in yet.
  3566. # Follow Annex B.1.2 of ECMAScript standard.
  3567. if '0' <= c <= '3':
  3568. maxdigits = 3
  3569. else:
  3570. maxdigits = 2
  3571. digits = buf.popwhile( helpers.is_octal_digit, maxchars=maxdigits )
  3572. n = helpers.decode_octal(digits)
  3573. if n == 0:
  3574. state.push_cond( self.options.zero_byte,
  3575. 'Zero-byte character (U+0000) in string may not be universally safe',
  3576. "\\"+digits, position=escape_position, outer_position=string_position,
  3577. context='String')
  3578. else: # n != 0
  3579. state.push_cond( self.options.octal_numbers,
  3580. "JSON does not allow octal character escapes other than \"\\0\"",
  3581. "\\"+digits, position=escape_position, outer_position=string_position,
  3582. context='String')
  3583. should_stop = state.should_stop
  3584. if n < 128:
  3585. _append( chr(n) )
  3586. else:
  3587. _append( helpers.safe_unichr(n) )
  3588. elif escapes.has_key(c):
  3589. buf.skip()
  3590. _append( escapes[c] )
  3591. elif c == 'u' or c == 'x':
  3592. buf.skip()
  3593. esc_opener = '\\' + c
  3594. esc_closer = ''
  3595. if c == 'u':
  3596. if buf.peek() == '{':
  3597. buf.skip()
  3598. esc_opener += '{'
  3599. esc_closer = '}'
  3600. maxdigits = None
  3601. state.push_cond( self.options.extended_unicode_escapes,
  3602. "JSON strings do not allow \\u{...} escapes",
  3603. position=escape_position, outer_position=string_position,
  3604. context='String')
  3605. else:
  3606. maxdigits = 4
  3607. else: # c== 'x'
  3608. state.push_cond( self.options.js_string_escapes,
  3609. "JSON strings may not use the \\x hex-escape",
  3610. position=escape_position, outer_position=string_position,
  3611. context='String')
  3612. should_stop = state.should_stop
  3613. maxdigits = 2
  3614. digits = buf.popwhile( helpers.is_hex_digit, maxchars=maxdigits )
  3615. if esc_closer:
  3616. if buf.peek() != esc_closer:
  3617. state.push_error( "Unicode escape sequence is missing closing \'%s\'" % esc_closer, esc_opener+digits,
  3618. position=escape_position, outer_position=string_position,
  3619. context='String')
  3620. should_stop = state.should_stop
  3621. else:
  3622. buf.skip()
  3623. esc_sequence = esc_opener + digits + esc_closer
  3624. if not digits:
  3625. state.push_error('numeric character escape sequence is truncated', esc_sequence,
  3626. position=escape_position, outer_position=string_position,
  3627. context='String')
  3628. should_stop = state.should_stop
  3629. codepoint = 0xfffd # replacement char
  3630. else:
  3631. if maxdigits and len(digits) != maxdigits:
  3632. state.push_error('escape sequence has too few hexadecimal digits', esc_sequence,
  3633. position=escape_position, outer_position=string_position,
  3634. context='String')
  3635. codepoint = helpers.decode_hex( digits )
  3636. if codepoint > 0x10FFFF:
  3637. state.push_error( 'Unicode codepoint is beyond U+10FFFF', esc_opener+digits+esc_closer,
  3638. position=escape_position, outer_position=string_position,
  3639. context='String')
  3640. codepoint = 0xfffd # replacement char
  3641. if high_surrogate:
  3642. # Decode surrogate pair and clear high surrogate
  3643. low_surrogate = unichr(codepoint)
  3644. try:
  3645. uc = helpers.surrogate_pair_as_unicode( high_surrogate, low_surrogate )
  3646. except ValueError as err:
  3647. state.push_error( 'Illegal Unicode surrogate pair', (high_surrogate, low_surrogate), position=highsur_position,
  3648. outer_position=string_position,
  3649. context='String')
  3650. should_stop = state.should_stop
  3651. uc = u'\ufffd' # replacement char
  3652. _append( uc )
  3653. high_surrogate = None
  3654. highsur_position = None
  3655. elif codepoint < 128:
  3656. # ASCII chars always go in as a str
  3657. if codepoint==0:
  3658. state.push_cond( self.options.zero_byte,
  3659. 'Zero-byte character (U+0000) in string may not be universally safe',
  3660. position=escape_position, outer_position=string_position,
  3661. context='String')
  3662. should_stop = state.should_stop
  3663. _append( chr(codepoint) )
  3664. elif 0xd800 <= codepoint <= 0xdbff: # high surrogate
  3665. high_surrogate = unichr(codepoint) # remember until we get to the low surrogate
  3666. highsur_position = escape_position.copy()
  3667. elif 0xdc00 <= codepoint <= 0xdfff: # low surrogate
  3668. state.push_error('Low unicode surrogate must be proceeded by a high surrogate', position=escape_position,
  3669. outer_position=string_position,
  3670. context='String')
  3671. should_stop = state.should_stop
  3672. _append( u'\ufffd' ) # replacement char
  3673. else:
  3674. # Other chars go in as a unicode char
  3675. _append( helpers.safe_unichr(codepoint) )
  3676. else:
  3677. # Unknown escape sequence
  3678. state.push_cond( self.options.nonescape_characters,
  3679. 'String escape code is not allowed in strict JSON',
  3680. '\\'+c, position=escape_position, outer_position=string_position,
  3681. context='String')
  3682. should_stop = state.should_stop
  3683. _append( c )
  3684. buf.skip()
  3685. elif ord(c) <= 0x1f: # A control character
  3686. if ord(c) == 0:
  3687. state.push_cond( self.options.zero_byte,
  3688. 'Zero-byte character (U+0000) in string may not be universally safe',
  3689. position=buf.position, outer_position=string_position,
  3690. context='String')
  3691. should_stop = state.should_stop
  3692. if self.islineterm(c):
  3693. if not had_lineterm_error:
  3694. state.push_error('Line terminator characters must be escaped inside string literals',
  3695. 'U+%04X'%ord(c),
  3696. position=buf.position, outer_position=string_position,
  3697. context='String')
  3698. should_stop = state.should_stop
  3699. had_lineterm_error = True
  3700. _append( c )
  3701. buf.skip()
  3702. elif ccallowed:
  3703. _append( c )
  3704. buf.skip()
  3705. else:
  3706. state.push_error('Control characters must be escaped inside JSON string literals',
  3707. 'U+%04X'%ord(c),
  3708. position=buf.position, outer_position=string_position,
  3709. context='String')
  3710. should_stop = state.should_stop
  3711. buf.skip()
  3712. elif 0xd800 <= ord(c) <= 0xdbff: # a raw high surrogate
  3713. high_surrogate = buf.pop() # remember until we get to the low surrogate
  3714. highsur_position = buf.position.copy()
  3715. else: # A normal character; not an escape sequence or end-quote.
  3716. # Find a whole sequence of "safe" characters so we can append them
  3717. # all at once rather than one a time, for speed.
  3718. chunk = buf.popwhile( lambda c: c not in helpers.unsafe_string_chars and c != quote )
  3719. if not chunk:
  3720. _append( c )
  3721. buf.skip()
  3722. else:
  3723. _append( chunk )
  3724. # Check proper string termination
  3725. if high_surrogate:
  3726. state.push_error('High unicode surrogate must be followed by a low surrogate',
  3727. position=highsur_position, outer_position=string_position,
  3728. context='String')
  3729. _append( u'\ufffd' ) # replacement char
  3730. high_surrogate = None
  3731. highsur_position = None
  3732. if not saw_final_quote:
  3733. state.push_error('String literal is not terminated with a quotation mark', position=buf.position,
  3734. outer_position=string_position,
  3735. context='String')
  3736. if state.should_stop:
  3737. return undefined
  3738. # Compose the python string and update stats
  3739. s = ''.join( chunks )
  3740. state.update_string_stats( s, position=string_position )
  3741. # Call string hook
  3742. if self.has_hook('decode_string'):
  3743. try:
  3744. s = self.call_hook( 'decode_string', s, position=string_position )
  3745. except JSONSkipHook:
  3746. pass
  3747. except JSONError, err:
  3748. state.push_exception(err)
  3749. s = undefined
  3750. return s
  3751. def encode_string(self, s, state):
  3752. """Encodes a Python string into a JSON string literal.
  3753. """
  3754. # Must handle instances of UserString specially in order to be
  3755. # able to use ord() on it's simulated "characters". Also
  3756. # convert Python2 'str' types to unicode strings first.
  3757. import unicodedata, sys
  3758. import UserString
  3759. py2strenc = self.options.py2str_encoding
  3760. if isinstance(s, UserString.UserString):
  3761. def tochar(c):
  3762. c2 = c.data
  3763. if py2strenc and not isinstance(c2,unicode):
  3764. return c2.decode( py2strenc )
  3765. else:
  3766. return c2
  3767. elif py2strenc and not isinstance(s,unicode):
  3768. s = s.decode( py2strenc )
  3769. tochar = None
  3770. else:
  3771. # Could use "lambda c:c", but that is too slow. So we set to None
  3772. # and use an explicit if test inside the loop.
  3773. tochar = None
  3774. chunks = []
  3775. chunks.append('"')
  3776. revesc = self._rev_escapes
  3777. optrevesc = self._optional_rev_escapes
  3778. asciiencodable = self._asciiencodable
  3779. always_escape = state.options.always_escape_chars
  3780. encunicode = state.escape_unicode_test
  3781. i = 0
  3782. imax = len(s)
  3783. while i < imax:
  3784. if tochar:
  3785. c = tochar(s[i])
  3786. else:
  3787. c = s[i]
  3788. cord = ord(c)
  3789. if cord < 256 and asciiencodable[cord] and isinstance(encunicode, bool) \
  3790. and not (always_escape and c in always_escape):
  3791. # Contiguous runs of plain old printable ASCII can be copied
  3792. # directly to the JSON output without worry (unless the user
  3793. # has supplied a custom is-encodable function).
  3794. j = i
  3795. i += 1
  3796. while i < imax:
  3797. if tochar:
  3798. c = tochar(s[i])
  3799. else:
  3800. c = s[i]
  3801. cord = ord(c)
  3802. if cord < 256 and asciiencodable[cord] \
  3803. and not (always_escape and c in always_escape):
  3804. i += 1
  3805. else:
  3806. break
  3807. chunks.append( unicode(s[j:i]) )
  3808. elif revesc.has_key(c):
  3809. # Has a shortcut escape sequence, like "\n"
  3810. chunks.append(revesc[c])
  3811. i += 1
  3812. elif cord <= 0x1F:
  3813. # Always unicode escape ASCII-control characters
  3814. chunks.append(r'\u%04x' % cord)
  3815. i += 1
  3816. elif 0xD800 <= cord <= 0xDFFF:
  3817. # A raw surrogate character!
  3818. # This should ONLY happen in "narrow" Python builds
  3819. # where (sys.maxunicode == 65535) as Python itself
  3820. # uses UTF-16. But for "wide" Python builds, a raw
  3821. # surrogate should never happen.
  3822. handled_raw_surrogates = False
  3823. if sys.maxunicode == 0xFFFF and 0xD800 <= cord <= 0xDBFF and (i+1) < imax:
  3824. # In a NARROW Python, output surrogate pair as-is
  3825. hsurrogate = cord
  3826. i += 1
  3827. if tochar:
  3828. c = tochar(s[i])
  3829. else:
  3830. c = s[i]
  3831. cord = ord(c)
  3832. i += 1
  3833. if 0xDC00 <= cord <= 0xDFFF:
  3834. lsurrogate = cord
  3835. chunks.append(r'\u%04x\u%04x' % (hsurrogate,lsurrogate))
  3836. handled_raw_surrogates = True
  3837. if not handled_raw_surrogates:
  3838. cname = 'U+%04X' % cord
  3839. raise JSONEncodeError('can not include or escape a Unicode surrogate character',cname)
  3840. elif cord <= 0xFFFF:
  3841. # Other BMP Unicode character
  3842. if always_escape and c in always_escape:
  3843. doesc = True
  3844. elif unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']:
  3845. doesc = True
  3846. elif callable(encunicode):
  3847. doesc = encunicode( c )
  3848. else:
  3849. doesc = encunicode
  3850. if doesc:
  3851. if optrevesc.has_key(c):
  3852. chunks.append(optrevesc[c])
  3853. else:
  3854. chunks.append(r'\u%04x' % cord)
  3855. else:
  3856. chunks.append( c )
  3857. i += 1
  3858. else: # ord(c) >= 0x10000
  3859. # Non-BMP Unicode
  3860. if always_escape and c in always_escape:
  3861. doesc = True
  3862. elif unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']:
  3863. doesc = True
  3864. elif callable(encunicode):
  3865. doesc = encunicode( c )
  3866. else:
  3867. doesc = encunicode
  3868. if doesc:
  3869. for surrogate in helpers.unicode_as_surrogate_pair(c):
  3870. chunks.append(r'\u%04x' % ord(surrogate))
  3871. else:
  3872. chunks.append( c )
  3873. i += 1
  3874. chunks.append('"')
  3875. state.append( ''.join( chunks ) )
  3876. def decode_identifier(self, state, identifier_as_string=False):
  3877. """Decodes an identifier/keyword.
  3878. """
  3879. buf = state.buf
  3880. self.skipws(state)
  3881. start_position = buf.position
  3882. obj = None
  3883. kw = buf.pop_identifier()
  3884. if not kw:
  3885. state.push_error("Expected an identifier", position=start_position)
  3886. elif kw == 'null':
  3887. obj = None
  3888. state.stats.num_nulls += 1
  3889. elif kw == 'true':
  3890. obj = True
  3891. state.stats.num_bools += 1
  3892. elif kw == 'false':
  3893. obj = False
  3894. state.stats.num_bools += 1
  3895. elif kw == 'undefined':
  3896. state.push_cond( self.options.undefined_values,
  3897. "Strict JSON does not allow the 'undefined' keyword",
  3898. kw, position=start_position)
  3899. obj = undefined
  3900. state.stats.num_undefineds += 1
  3901. elif kw == 'NaN' or kw == 'Infinity':
  3902. state.push_cond( self.options.non_numbers,
  3903. "%s literals are not allowed in strict JSON" % kw,
  3904. kw, position=start_position)
  3905. if self.has_hook('decode_float'):
  3906. try:
  3907. val = self.call_hook( 'decode_float', kw, position=start_position )
  3908. except JSONSkipHook:
  3909. pass
  3910. except JSONError, err:
  3911. state.push_exception(err)
  3912. return undefined
  3913. else:
  3914. return val
  3915. elif self.has_hook('decode_number'):
  3916. try:
  3917. val = self.call_hook( 'decode_number', kw, position=start_position )
  3918. except JSONSkipHook:
  3919. pass
  3920. except JSONError, err:
  3921. state.push_exception(err)
  3922. return undefined
  3923. else:
  3924. return val
  3925. if kw == 'NaN':
  3926. state.stats.num_nans += 1
  3927. obj = state.options.nan
  3928. else:
  3929. state.stats.num_infinities += 1
  3930. obj = state.options.inf
  3931. else:
  3932. # Convert unknown identifiers into strings
  3933. if identifier_as_string:
  3934. if kw in helpers.javascript_reserved_words:
  3935. state.push_warning( "Identifier is a JavaScript reserved word",
  3936. kw, position=start_position)
  3937. state.push_cond( self.options.identifier_keys,
  3938. "JSON does not allow identifiers to be used as strings",
  3939. kw, position=start_position)
  3940. state.stats.num_identifiers += 1
  3941. obj = self.decode_javascript_identifier( kw )
  3942. else:
  3943. state.push_error("Unknown identifier", kw, position=start_position)
  3944. obj = undefined
  3945. state.stats.num_identifiers += 1
  3946. return obj
  3947. def skip_comment(self, state):
  3948. """Skips an ECMAScript comment, either // or /* style.
  3949. The contents of the comment are returned as a string, as well
  3950. as the index of the character immediately after the comment.
  3951. """
  3952. buf = state.buf
  3953. uniws = self.options.unicode_whitespace
  3954. s = buf.peekstr(2)
  3955. if s != '//' and s != '/*':
  3956. return None
  3957. state.push_cond( self.options.comments, 'Comments are not allowed in strict JSON' )
  3958. start_position = buf.position
  3959. buf.skip(2)
  3960. multiline = (s == '/*')
  3961. saw_close = False
  3962. while not buf.at_end:
  3963. if multiline:
  3964. if buf.peekstr(2) == '*/':
  3965. buf.skip(2)
  3966. saw_close = True
  3967. break
  3968. elif buf.peekstr(2) == '/*':
  3969. state.push_error('Multiline /* */ comments may not nest',
  3970. outer_position=start_position,
  3971. context='Comment')
  3972. else:
  3973. if buf.at_eol( uniws ):
  3974. buf.skip_to_next_line( uniws )
  3975. saw_close = True
  3976. break
  3977. buf.pop()
  3978. if not saw_close and multiline:
  3979. state.push_error('Comment was never terminated', outer_position=start_position,
  3980. context='Comment')
  3981. state.stats.num_comments += 1
  3982. def skipws_nocomments(self, state):
  3983. """Skips whitespace (will not allow comments).
  3984. """
  3985. return state.buf.skipws( not self.options.is_forbid_unicode_whitespace )
  3986. def skipws(self, state):
  3987. """Skips all whitespace, including comments and unicode whitespace
  3988. Takes a string and a starting index, and returns the index of the
  3989. next non-whitespace character.
  3990. If the 'skip_comments' behavior is True and not running in
  3991. strict JSON mode, then comments will be skipped over just like
  3992. whitespace.
  3993. """
  3994. buf = state.buf
  3995. uniws = not self.options.unicode_whitespace
  3996. while not buf.at_end:
  3997. c = buf.peekstr(2)
  3998. if c == '/*' or c == '//':
  3999. cmt = self.skip_comment( state )
  4000. elif buf.at_ws( uniws ):
  4001. buf.skipws( uniws )
  4002. else:
  4003. break
  4004. def decode_composite(self, state):
  4005. """Intermediate-level JSON decoder for composite literal types (array and object).
  4006. """
  4007. if state.should_stop:
  4008. return None
  4009. buf = state.buf
  4010. self.skipws(state)
  4011. opener = buf.peek()
  4012. if opener not in '{[':
  4013. state.push_error('Composite data must start with "[" or "{"')
  4014. return None
  4015. start_position = buf.position
  4016. buf.skip()
  4017. if opener == '[':
  4018. isdict = False
  4019. closer = ']'
  4020. obj = []
  4021. else:
  4022. isdict = True
  4023. closer = '}'
  4024. if state.options.sort_keys == SORT_PRESERVE and _OrderedDict:
  4025. obj = _OrderedDict()
  4026. else:
  4027. obj = {}
  4028. num_items = 0
  4029. self.skipws(state)
  4030. c = buf.peek()
  4031. if c == closer:
  4032. # empty composite
  4033. buf.skip()
  4034. done = True
  4035. else:
  4036. saw_value = False # set to false at beginning and after commas
  4037. done = False
  4038. while not done and not buf.at_end and not state.should_stop:
  4039. self.skipws(state)
  4040. c = buf.peek()
  4041. if c == '':
  4042. break # will report error futher down because done==False
  4043. elif c == ',':
  4044. if not saw_value:
  4045. # no preceeding value, an elided (omitted) element
  4046. if isdict:
  4047. state.push_error('Can not omit elements of an object (dictionary)',
  4048. outer_position=start_position,
  4049. context='Object')
  4050. else:
  4051. state.push_cond( self.options.omitted_array_elements,
  4052. 'Can not omit elements of an array (list)',
  4053. outer_position=start_position,
  4054. context='Array')
  4055. obj.append( undefined )
  4056. if state.stats:
  4057. state.stats.num_undefineds += 1
  4058. buf.skip() # skip over comma
  4059. saw_value = False
  4060. continue
  4061. elif c == closer:
  4062. if not saw_value:
  4063. if isdict:
  4064. state.push_cond( self.options.trailing_comma,
  4065. 'Strict JSON does not allow a final comma in an object (dictionary) literal',
  4066. outer_position=start_position,
  4067. context='Object')
  4068. else:
  4069. state.push_cond( self.options.trailing_comma,
  4070. 'Strict JSON does not allow a final comma in an array (list) literal',
  4071. outer_position=start_position,
  4072. context='Array')
  4073. buf.skip() # skip over closer
  4074. done = True
  4075. break
  4076. elif c in ']}':
  4077. if isdict:
  4078. cdesc='Object'
  4079. else:
  4080. cdesc='Array'
  4081. state.push_error("Expected a '%c' but saw '%c'" % (closer,c),
  4082. outer_position=start_position, context=cdesc)
  4083. done = True
  4084. break
  4085. if state.should_stop:
  4086. break
  4087. # Decode the item/value
  4088. value_position = buf.position
  4089. if isdict:
  4090. val = self.decodeobj(state, identifier_as_string=True)
  4091. else:
  4092. val = self.decodeobj(state, identifier_as_string=False)
  4093. if val is syntax_error:
  4094. recover_c = self.recover_parser(state)
  4095. if recover_c not in ':':
  4096. continue
  4097. if state.should_stop:
  4098. break
  4099. if saw_value:
  4100. # Two values without a separating comma
  4101. if isdict:
  4102. cdesc='Object'
  4103. else:
  4104. cdesc='Array'
  4105. state.push_error('Values must be separated by a comma',
  4106. position=value_position, outer_position=start_position,
  4107. context=cdesc)
  4108. saw_value = True
  4109. self.skipws(state)
  4110. if state.should_stop:
  4111. break
  4112. if isdict:
  4113. skip_item = False
  4114. key = val # Ref 11.1.5
  4115. key_position = value_position
  4116. if not helpers.isstringtype(key):
  4117. if helpers.isnumbertype(key):
  4118. state.push_cond( self.options.nonstring_keys,
  4119. 'JSON only permits string literals as object properties (keys)',
  4120. position=key_position, outer_position=start_position,
  4121. context='Object')
  4122. else:
  4123. state.push_error('Object properties (keys) must be string literals, numbers, or identifiers',
  4124. position=key_position, outer_position=start_position,
  4125. context='Object')
  4126. skip_item = True
  4127. c = buf.peek()
  4128. if c != ':':
  4129. state.push_error('Missing value for object property, expected ":"',
  4130. position=value_position, outer_position=start_position,
  4131. context='Object')
  4132. buf.skip() # skip over colon
  4133. self.skipws(state)
  4134. rval = self.decodeobj(state)
  4135. self.skipws(state)
  4136. if not skip_item:
  4137. if key in obj:
  4138. state.push_cond( self.options.duplicate_keys,
  4139. 'Object contains duplicate key',
  4140. key, position=key_position, outer_position=start_position,
  4141. context='Object')
  4142. if key == '':
  4143. state.push_cond( self.options.non_portable,
  4144. 'Using an empty string "" as an object key may not be portable',
  4145. position=key_position, outer_position=start_position,
  4146. context='Object')
  4147. obj[ key ] = rval
  4148. num_items += 1
  4149. else: # islist
  4150. obj.append( val )
  4151. num_items += 1
  4152. # end while
  4153. if state.stats:
  4154. if isdict:
  4155. state.stats.max_items_in_object = max(state.stats.max_items_in_object, num_items)
  4156. else:
  4157. state.stats.max_items_in_array = max(state.stats.max_items_in_array, num_items)
  4158. if state.should_stop:
  4159. return obj
  4160. # Make sure composite value is properly terminated
  4161. if not done:
  4162. if isdict:
  4163. state.push_error('Object literal (dictionary) is not terminated',
  4164. outer_position=start_position, context='Object')
  4165. else:
  4166. state.push_error('Array literal (list) is not terminated',
  4167. outer_position=start_position, context='Array')
  4168. # Update stats and run hooks
  4169. if isdict:
  4170. state.stats.num_objects += 1
  4171. if self.has_hook('decode_object'):
  4172. try:
  4173. obj = self.call_hook( 'decode_object', obj, position=start_position )
  4174. except JSONSkipHook:
  4175. pass
  4176. except JSONError, err:
  4177. state.push_exception(err)
  4178. obj = undefined
  4179. else:
  4180. state.stats.num_arrays += 1
  4181. if self.has_hook('decode_array'):
  4182. try:
  4183. obj = self.call_hook( 'decode_array', obj, position=start_position )
  4184. except JSONSkipHook:
  4185. pass
  4186. except JSONError, err:
  4187. state.push_exception(err)
  4188. obj = undefined
  4189. return obj
  4190. def decode_javascript_identifier(self, name):
  4191. """Convert a JavaScript identifier into a Python string object.
  4192. This method can be overriden by a subclass to redefine how JavaScript
  4193. identifiers are turned into Python objects. By default this just
  4194. converts them into strings.
  4195. """
  4196. return name
  4197. def decodeobj(self, state, identifier_as_string=False, at_document_start=False):
  4198. """Intermediate-level JSON decoder.
  4199. Takes a string and a starting index, and returns a two-tuple consting
  4200. of a Python object and the index of the next unparsed character.
  4201. If there is no value at all (empty string, etc), then None is
  4202. returned instead of a tuple.
  4203. """
  4204. buf = state.buf
  4205. obj = None
  4206. self.skipws(state)
  4207. if buf.at_end:
  4208. state.push_error('Unexpected end of input')
  4209. c = buf.peek()
  4210. if c in '{[':
  4211. state.cur_depth += 1
  4212. try:
  4213. state.update_depth_stats()
  4214. obj = self.decode_composite(state)
  4215. finally:
  4216. state.cur_depth -= 1
  4217. else:
  4218. if at_document_start:
  4219. state.push_cond( self.options.any_type_at_start,
  4220. 'JSON document must start with an object or array type only' )
  4221. if c in self._string_quotes:
  4222. obj = self.decode_string(state)
  4223. elif c.isdigit() or c in '.+-':
  4224. obj = self.decode_number(state)
  4225. elif c.isalpha() or c in'_$':
  4226. obj = self.decode_identifier(state, identifier_as_string=identifier_as_string)
  4227. else:
  4228. state.push_error('Can not decode value starting with character %r' % c)
  4229. buf.skip()
  4230. self.recover_parser(state)
  4231. obj = syntax_error
  4232. return obj
  4233. def decode(self, txt, encoding=None, return_errors=False, return_stats=False):
  4234. """Decodes a JSON-encoded string into a Python object.
  4235. The 'return_errors' parameter controls what happens if the
  4236. input JSON has errors in it.
  4237. * False: the first error will be raised as a Python
  4238. exception. If there are no errors then the corresponding
  4239. Python object will be returned.
  4240. * True: the return value is always a 2-tuple: (object, error_list)
  4241. """
  4242. import sys
  4243. state = decode_state( options=self.options )
  4244. # Prepare the input
  4245. state.set_input( txt, encoding=encoding )
  4246. # Do the decoding
  4247. if not state.has_errors:
  4248. self.__sanity_check_start( state )
  4249. if not state.has_errors:
  4250. try:
  4251. self._do_decode( state ) # DECODE!
  4252. except JSONException, err:
  4253. state.push_exception( err )
  4254. except Exception, err: # Mainly here to catch maximum recursion depth exceeded
  4255. e2 = sys.exc_info()
  4256. raise
  4257. newerr = JSONDecodeError("An unexpected failure occured", severity='fatal', position=state.buf.position)
  4258. newerr.__cause__ = err
  4259. newerr.__traceback__ = e2[2]
  4260. state.push_exception( newerr )
  4261. if return_stats and state.buf:
  4262. state.stats.num_excess_whitespace = state.buf.num_ws_skipped
  4263. state.stats.total_chars = state.buf.position.char_position
  4264. # Handle the errors
  4265. result_type = _namedtuple('json_results',['object','errors','stats'])
  4266. if return_errors:
  4267. if return_stats:
  4268. return result_type(state.obj, state.errors, state.stats)
  4269. else:
  4270. return result_type(state.obj, state.errors, None)
  4271. else:
  4272. # Don't cause warnings to raise an error
  4273. errors = [err for err in state.errors if err.severity in ('fatal','error')]
  4274. if errors:
  4275. raise errors[0]
  4276. if return_stats:
  4277. return result_type(state.obj, None, state.stats)
  4278. else:
  4279. return state.obj
  4280. def __sanity_check_start(self, state):
  4281. """Check that the document seems sane by looking at the first couple characters.
  4282. Check that the decoding seems sane. Per RFC 4627 section 3:
  4283. "Since the first two characters of a JSON text will
  4284. always be ASCII characters [RFC0020], ..."
  4285. [WAS removed from RFC 7158, but still valid via the grammar.]
  4286. This check is probably not necessary, but it allows us to
  4287. raise a suitably descriptive error rather than an obscure
  4288. syntax error later on.
  4289. Note that the RFC requirements of two ASCII characters seems
  4290. to be an incorrect statement as a JSON string literal may have
  4291. as it's first character any unicode character. Thus the first
  4292. two characters will always be ASCII, unless the first
  4293. character is a quotation mark. And in non-strict mode we can
  4294. also have a few other characters too.
  4295. """
  4296. is_sane = True
  4297. unitxt = state.buf.peekstr(2)
  4298. if len(unitxt) >= 2:
  4299. first, second = unitxt[:2]
  4300. if first in self._string_quotes:
  4301. pass # second can be anything inside string literal
  4302. else:
  4303. if ((ord(first) < 0x20 or ord(first) > 0x7f) or \
  4304. (ord(second) < 0x20 or ord(second) > 0x7f)) and \
  4305. (not self.isws(first) and not self.isws(second)):
  4306. # Found non-printable ascii, must check unicode
  4307. # categories to see if the character is legal.
  4308. # Only whitespace, line and paragraph separators,
  4309. # and format control chars are legal here.
  4310. import unicodedata
  4311. catfirst = unicodedata.category(unicode(first))
  4312. catsecond = unicodedata.category(unicode(second))
  4313. if catfirst not in ('Zs','Zl','Zp','Cf') or \
  4314. catsecond not in ('Zs','Zl','Zp','Cf'):
  4315. state.push_fatal( 'The input is gibberish, is the Unicode encoding correct?' )
  4316. return is_sane
  4317. def _do_decode(self, state):
  4318. """This is the internal function that does the JSON decoding.
  4319. Called by the decode() method, after it has performed any Unicode decoding, etc.
  4320. """
  4321. buf = state.buf
  4322. self.skipws(state)
  4323. if buf.at_end:
  4324. state.push_error('No value to decode')
  4325. else:
  4326. if state.options.decimal_context:
  4327. dec_ctx = decimal.localcontext( state.options.decimal_context )
  4328. else:
  4329. dec_ctx = _dummy_context_manager
  4330. with dec_ctx:
  4331. state.obj = self.decodeobj(state, at_document_start=True )
  4332. if not state.should_stop:
  4333. # Make sure there's nothing at the end
  4334. self.skipws(state)
  4335. if not buf.at_end:
  4336. state.push_error('Unexpected text after end of JSON value')
  4337. def _classify_for_encoding( self, obj ):
  4338. import datetime
  4339. c = 'other'
  4340. if obj is None:
  4341. c = 'null'
  4342. elif obj is undefined:
  4343. c = 'undefined'
  4344. elif isinstance(obj,bool):
  4345. c = 'bool'
  4346. elif isinstance(obj, (int,long,float,complex)) or\
  4347. (decimal and isinstance(obj, decimal.Decimal)):
  4348. c = 'number'
  4349. elif isinstance(obj, basestring) or helpers.isstringtype(obj):
  4350. c = 'string'
  4351. else:
  4352. if isinstance(obj,dict):
  4353. c = 'dict'
  4354. elif isinstance(obj,tuple) and hasattr(obj,'_asdict') and callable(obj._asdict):
  4355. # Have a named tuple
  4356. enc_nt = self.options.encode_namedtuple_as_object
  4357. if enc_nt and (enc_nt is True or (callable(enc_nt) and enc_nt(obj))):
  4358. c = 'namedtuple'
  4359. else:
  4360. c = 'sequence'
  4361. elif isinstance(obj, (list,tuple,set,frozenset)):
  4362. c = 'sequence'
  4363. elif hasattr(obj,'iterkeys') or (hasattr(obj,'__getitem__') and hasattr(obj,'keys')):
  4364. c = 'dict'
  4365. elif isinstance(obj, datetime.datetime):
  4366. # Check datetime before date because it is a subclass!
  4367. c = 'datetime'
  4368. elif isinstance(obj, datetime.date):
  4369. c = 'date'
  4370. elif isinstance(obj, datetime.time):
  4371. c = 'time'
  4372. elif isinstance(obj, datetime.timedelta):
  4373. c = 'timedelta'
  4374. elif _py_major >= 3 and isinstance(obj,(bytes,bytearray)):
  4375. c = 'bytes'
  4376. elif _py_major >= 3 and isinstance(obj,memoryview):
  4377. c = 'memoryview'
  4378. elif _enum is not None and isinstance(obj,_enum):
  4379. c = 'enum'
  4380. else:
  4381. c = 'other'
  4382. return c
  4383. def encode(self, obj, encoding=None ):
  4384. """Encodes the Python object into a JSON string representation.
  4385. This method will first attempt to encode an object by seeing
  4386. if it has a json_equivalent() method. If so than it will
  4387. call that method and then recursively attempt to encode
  4388. the object resulting from that call.
  4389. Next it will attempt to determine if the object is a native
  4390. type or acts like a squence or dictionary. If so it will
  4391. encode that object directly.
  4392. Finally, if no other strategy for encoding the object of that
  4393. type exists, it will call the encode_default() method. That
  4394. method currently raises an error, but it could be overridden
  4395. by subclasses to provide a hook for extending the types which
  4396. can be encoded.
  4397. """
  4398. import sys, codecs
  4399. # Make a fresh encoding state
  4400. state = encode_state( self.options )
  4401. # Find the codec to use. CodecInfo will be in 'cdk' and name in 'encoding'.
  4402. #
  4403. # Also set the state's 'escape_unicode_test' property which is used to
  4404. # determine what characters to \u-escape.
  4405. if encoding is None:
  4406. cdk = None
  4407. elif isinstance(encoding, codecs.CodecInfo):
  4408. cdk = encoding
  4409. encoding = cdk.name
  4410. else:
  4411. cdk = helpers.lookup_codec( encoding )
  4412. if not cdk:
  4413. raise JSONEncodeError('no codec available for character encoding',encoding)
  4414. if self.options.escape_unicode and callable(self.options.escape_unicode):
  4415. # User-supplied repertoire test function
  4416. state.escape_unicode_test = self.options.escape_unicode
  4417. else:
  4418. if self.options.escape_unicode==True or not cdk or cdk.name.lower() == 'ascii':
  4419. # ASCII, ISO8859-1, or and Unknown codec -- \u escape anything not ASCII
  4420. state.escape_unicode_test = lambda c: ord(c) >= 0x80
  4421. elif cdk.name == 'iso8859-1':
  4422. state.escape_unicode_test = lambda c: ord(c) >= 0x100
  4423. elif cdk and cdk.name.lower().startswith('utf'):
  4424. # All UTF-x encodings can do the whole Unicode repertoire, so
  4425. # do nothing special.
  4426. state.escape_unicode_test = False
  4427. else:
  4428. # An unusual codec. We need to test every character
  4429. # to see if it is in the codec's repertoire to determine
  4430. # if we should \u escape that character.
  4431. enc_func = cdk.encode
  4432. def escape_unicode_hardway( c ):
  4433. try:
  4434. enc_func( c )
  4435. except UnicodeEncodeError:
  4436. return True
  4437. else:
  4438. return False
  4439. state.escape_unicode_test = escape_unicode_hardway
  4440. # Make sure the encoding is not degenerate: it can encode the minimal
  4441. # number of characters needed by the JSON syntax rules.
  4442. if encoding is not None:
  4443. try:
  4444. output, nchars = cdk.encode( JSON.json_syntax_characters )
  4445. except UnicodeError, err:
  4446. raise JSONEncodeError("Output encoding %s is not sufficient to encode JSON" % cdk.name)
  4447. # Do the JSON encoding!
  4448. self._do_encode( obj, state )
  4449. if not self.options.encode_compactly:
  4450. state.append('\n')
  4451. unitxt = state.combine()
  4452. # Do the final Unicode encoding
  4453. if encoding is None:
  4454. output = unitxt
  4455. else:
  4456. try:
  4457. output, nchars = cdk.encode( unitxt )
  4458. except UnicodeEncodeError, err:
  4459. # Re-raise as a JSONDecodeError
  4460. e2 = sys.exc_info()
  4461. newerr = JSONEncodeError("a Unicode encoding error occurred")
  4462. # Simulate Python 3's: "raise X from Y" exception chaining
  4463. newerr.__cause__ = err
  4464. newerr.__traceback__ = e2[2]
  4465. raise newerr
  4466. return output
  4467. def _do_encode(self, obj, state):
  4468. """Internal encode function."""
  4469. obj_classification = self._classify_for_encoding( obj )
  4470. if self.has_hook('encode_value'):
  4471. orig_obj = obj
  4472. try:
  4473. obj = self.call_hook( 'encode_value', obj )
  4474. except JSONSkipHook:
  4475. pass
  4476. if obj is not orig_obj:
  4477. prev_cls = obj_classification
  4478. obj_classification = self._classify_for_encoding( obj )
  4479. if obj_classification != prev_cls:
  4480. # Got a different type of object, re-encode again
  4481. self._do_encode( obj, state )
  4482. return
  4483. if hasattr(obj, 'json_equivalent'):
  4484. success = self.encode_equivalent( obj, state )
  4485. if success:
  4486. return
  4487. if obj_classification == 'null':
  4488. self.encode_null( state )
  4489. elif obj_classification == 'undefined':
  4490. if not self.options.is_forbid_undefined_values:
  4491. self.encode_undefined( state )
  4492. else:
  4493. raise JSONEncodeError('strict JSON does not permit "undefined" values')
  4494. elif obj_classification == 'bool':
  4495. self.encode_boolean( obj, state )
  4496. elif obj_classification == 'number':
  4497. try:
  4498. self.encode_number( obj, state )
  4499. except JSONEncodeError, err1:
  4500. # Bad number, probably a complex with non-zero imaginary part.
  4501. # Let the default encoders take a shot at encoding.
  4502. try:
  4503. self.try_encode_default(obj, state)
  4504. except Exception, err2:
  4505. # Default handlers couldn't deal with it, re-raise original exception.
  4506. raise err1
  4507. elif obj_classification == 'string':
  4508. self.encode_string( obj, state )
  4509. elif obj_classification == 'enum': # Python 3.4 enum.Enum
  4510. self.encode_enum( obj, state )
  4511. elif obj_classification == 'datetime': # Python datetime.datetime
  4512. self.encode_datetime( obj, state )
  4513. elif obj_classification == 'date': # Python datetime.date
  4514. self.encode_date( obj, state )
  4515. elif obj_classification == 'time': # Python datetime.time
  4516. self.encode_time( obj, state )
  4517. elif obj_classification == 'timedelta': # Python datetime.time
  4518. self.encode_timedelta( obj, state )
  4519. else:
  4520. # Anything left is probably composite, or an unconvertable type.
  4521. self.encode_composite( obj, state )
  4522. def encode_enum(self, val, state):
  4523. """Encode a Python Enum value into JSON."""
  4524. eas = self.options.encode_enum_as
  4525. if eas == 'qname':
  4526. self.encode_string( str(obj), state )
  4527. elif eas == 'value':
  4528. self._do_encode( obj.value, state )
  4529. else: # eas == 'name'
  4530. self.encode_string( obj.name, state )
  4531. def encode_date(self, dt, state):
  4532. fmt = self.options.date_format
  4533. if not fmt or fmt == 'iso':
  4534. fmt = '%Y-%m-%d'
  4535. self.encode_string( dt.strftime(fmt), state )
  4536. def encode_datetime(self, dt, state):
  4537. fmt = self.options.datetime_format
  4538. is_iso = not fmt or fmt == 'iso'
  4539. if is_iso:
  4540. if dt.microsecond == 0:
  4541. fmt = '%Y-%m-%dT%H:%M:%S%z'
  4542. else:
  4543. fmt = '%Y-%m-%dT%H:%M:%S.%f%z'
  4544. s = dt.strftime(fmt)
  4545. if is_iso and s.endswith('-00:00') or s.endswith('+00:00'):
  4546. s = s[:-6] + 'Z' # Change UTC to use 'Z' notation
  4547. self.encode_string( s, state )
  4548. def encode_time(self, t, state):
  4549. fmt = self.options.datetime_format
  4550. is_iso = not fmt or fmt == 'iso'
  4551. if is_iso:
  4552. if dt.microsecond == 0:
  4553. fmt = 'T%H:%M:%S%z'
  4554. else:
  4555. fmt = 'T%H:%M:%S.%f%z'
  4556. s = t.strftime(fmt)
  4557. if is_iso and s.endswith('-00:00') or s.endswith('+00:00'):
  4558. s = s[:-6] + 'Z' # Change UTC to use 'Z' notation
  4559. self.encode_string( s, state )
  4560. def encode_timedelta(self, td, state):
  4561. fmt = self.options.timedelta_format
  4562. if not fmt or fmt == 'iso':
  4563. s = helpers.format_timedelta_iso( td )
  4564. elif fmt == 'hms':
  4565. s = str(td)
  4566. else:
  4567. raise ValueError("Unknown timedelta_format %r" % fmt)
  4568. self.encode_string( s, state )
  4569. def encode_composite(self, obj, state, obj_classification=None):
  4570. """Encodes just composite objects: dictionaries, lists, or sequences.
  4571. Basically handles any python type for which iter() can create
  4572. an iterator object.
  4573. This method is not intended to be called directly. Use the
  4574. encode() method instead.
  4575. """
  4576. import sys
  4577. if not obj_classification:
  4578. obj_classification = self._classify_for_encoding(obj)
  4579. # Convert namedtuples to dictionaries
  4580. if obj_classification == 'namedtuple':
  4581. obj = obj._asdict()
  4582. obj_classification = 'dict'
  4583. # Convert 'unsigned byte' memory views into plain bytes
  4584. if obj_classification == 'memoryview' and obj.format == 'B':
  4585. obj = obj.tobytes()
  4586. obj_classification = 'bytes'
  4587. # Run hooks
  4588. hook_name = None
  4589. if obj_classification == 'dict':
  4590. hook_name = 'encode_dict'
  4591. elif obj_classification == 'sequence':
  4592. hook_name = 'encode_sequence'
  4593. elif obj_classification == 'bytes':
  4594. hook_name = 'encode_bytes'
  4595. if self.has_hook(hook_name):
  4596. try:
  4597. new_obj = self.call_hook( hook_name, obj )
  4598. except JSONSkipHook:
  4599. pass
  4600. else:
  4601. if new_obj is not obj:
  4602. obj = new_obj
  4603. prev_cls = obj_classification
  4604. obj_classification = self._classify_for_encoding( obj )
  4605. if obj_classification != prev_cls:
  4606. # Transformed to a different kind of object, call
  4607. # back to the general encode() method.
  4608. self._do_encode( obj, state )
  4609. return
  4610. # Else, fall through
  4611. # At his point we have decided to do with an object or an array
  4612. isdict = (obj_classification == 'dict')
  4613. # Get iterator
  4614. it = None
  4615. if isdict and hasattr(obj,'iterkeys'):
  4616. try:
  4617. it = obj.iterkeys()
  4618. except AttributeError:
  4619. pass
  4620. else:
  4621. try:
  4622. it = iter(obj)
  4623. except TypeError:
  4624. pass
  4625. # Convert each member to JSON
  4626. if it is not None:
  4627. # Try to get length, but don't fail if we can't
  4628. try:
  4629. numitems = len(obj)
  4630. except TypeError:
  4631. numitems = 0
  4632. # Output the opening bracket or brace
  4633. compactly = self.options.encode_compactly
  4634. if not compactly:
  4635. indent0 = self.options.indentation_for_level( state.nest_level )
  4636. indent = self.options.indentation_for_level( state.nest_level+1 )
  4637. spaces_after_opener = ''
  4638. if isdict:
  4639. opener = '{'
  4640. closer = '}'
  4641. if compactly:
  4642. dictcolon = ':'
  4643. else:
  4644. dictcolon = ' : '
  4645. else:
  4646. opener = '['
  4647. closer = ']'
  4648. if not compactly:
  4649. #opener = opener + ' '
  4650. spaces_after_opener = self.options.spaces_to_next_indent_level(subtract=len(opener))
  4651. state.append( opener )
  4652. state.append( spaces_after_opener )
  4653. # Now iterate through all the items and collect their representations
  4654. parts = [] # Collects each of the members
  4655. part_keys = [] # For dictionary key sorting, tuples (key,index)
  4656. try: # while not StopIteration
  4657. part_idx = 0
  4658. while True:
  4659. obj2 = it.next()
  4660. part_idx += 1 # Note, will start counting at 1
  4661. if obj2 is obj:
  4662. raise JSONEncodeError('trying to encode an infinite sequence',obj)
  4663. if isdict:
  4664. obj3 = obj[obj2]
  4665. # Dictionary key is in obj2 and value in obj3.
  4666. # Let any hooks transform the key.
  4667. if self.has_hook('encode_value'):
  4668. try:
  4669. newobj = self.call_hook( 'encode_value', obj2 )
  4670. except JSONSkipHook:
  4671. pass
  4672. else:
  4673. obj2 = newobj
  4674. if self.has_hook('encode_dict_key'):
  4675. try:
  4676. newkey = self.call_hook( 'encode_dict_key', obj2 )
  4677. except JSONSkipHook:
  4678. pass
  4679. else:
  4680. obj2 = newkey
  4681. # Check JSON restrictions on key types
  4682. if not helpers.isstringtype(obj2):
  4683. if helpers.isnumbertype(obj2):
  4684. if not self.options.is_allow_nonstring_keys:
  4685. raise JSONEncodeError('object properties (dictionary keys) must be strings in strict JSON',obj2)
  4686. else:
  4687. raise JSONEncodeError('object properties (dictionary keys) can only be strings or numbers in ECMAScript',obj2)
  4688. part_keys.append( (obj2, part_idx-1) )
  4689. # Encode this item in the sequence and put into item_chunks
  4690. substate = state.make_substate()
  4691. self._do_encode( obj2, substate )
  4692. if isdict:
  4693. substate.append( dictcolon )
  4694. substate2 = substate.make_substate()
  4695. self._do_encode( obj3, substate2 )
  4696. substate.join_substate( substate2 )
  4697. parts.append( substate )
  4698. # Next item iteration
  4699. except StopIteration:
  4700. pass
  4701. # Sort dictionary keys
  4702. if isdict:
  4703. srt = self.options.sort_keys
  4704. if srt == SORT_PRESERVE:
  4705. if _OrderedDict and isinstance(obj,_OrderedDict):
  4706. srt = SORT_NONE # Will keep order
  4707. else:
  4708. srt = SORT_SMART
  4709. if not srt or srt in (SORT_NONE, SORT_PRESERVE):
  4710. srt = None
  4711. elif callable(srt):
  4712. part_keys.sort( key=(lambda t: (srt(t[0]),t[0])) )
  4713. elif srt == SORT_SMART:
  4714. part_keys.sort( key=(lambda t: (smart_sort_transform(t[0]),t[0])) )
  4715. elif srt == SORT_ALPHA_CI:
  4716. part_keys.sort( key=(lambda t: (unicode(t[0]).upper(),t[0])) )
  4717. elif srt or srt == SORT_ALPHA:
  4718. part_keys.sort( key=(lambda t: unicode(t[0])) )
  4719. # Now make parts match the new sort order
  4720. if srt is not None:
  4721. parts = [parts[pk[1]] for pk in part_keys]
  4722. if compactly:
  4723. sep = ','
  4724. elif len(parts) <= self.options.max_items_per_line:
  4725. sep = ', '
  4726. else:
  4727. #state.append(spaces_after_opener)
  4728. state.append('\n' + indent)
  4729. sep = ',\n' + indent
  4730. for pnum, substate in enumerate(parts):
  4731. if pnum > 0:
  4732. state.append( sep )
  4733. state.join_substate( substate )
  4734. if not compactly:
  4735. if numitems > self.options.max_items_per_line:
  4736. state.append('\n' + indent0)
  4737. else:
  4738. state.append(' ')
  4739. state.append(closer) # final '}' or ']'
  4740. else: # Can't create an iterator for the object
  4741. self.try_encode_default( obj, state )
  4742. def encode_equivalent( self, obj, state ):
  4743. """This method is used to encode user-defined class objects.
  4744. The object being encoded should have a json_equivalent()
  4745. method defined which returns another equivalent object which
  4746. is easily JSON-encoded. If the object in question has no
  4747. json_equivalent() method available then None is returned
  4748. instead of a string so that the encoding will attempt the next
  4749. strategy.
  4750. If a caller wishes to disable the calling of json_equivalent()
  4751. methods, then subclass this class and override this method
  4752. to just return None.
  4753. """
  4754. if hasattr(obj, 'json_equivalent') \
  4755. and callable(getattr(obj,'json_equivalent')):
  4756. obj2 = obj.json_equivalent()
  4757. if obj2 is obj:
  4758. # Try to prevent careless infinite recursion
  4759. raise JSONEncodeError('object has a json_equivalent() method that returns itself',obj)
  4760. self._do_encode( obj2, state )
  4761. return True
  4762. else:
  4763. return False
  4764. def try_encode_default( self, obj, state ):
  4765. orig_obj = obj
  4766. if self.has_hook('encode_default'):
  4767. try:
  4768. obj = self.call_hook( 'encode_default', obj )
  4769. except JSONSkipHook:
  4770. pass
  4771. else:
  4772. if obj is not orig_obj:
  4773. # Hook made a transformation, re-encode it
  4774. return self._do_encode( obj, state )
  4775. # End of the road.
  4776. raise JSONEncodeError('can not encode object into a JSON representation',obj)
  4777. # ------------------------------
  4778. def encode( obj, encoding=None, **kwargs ):
  4779. r"""Encodes a Python object into a JSON-encoded string.
  4780. * 'strict' (Boolean, default False)
  4781. If 'strict' is set to True, then only strictly-conforming JSON
  4782. output will be produced. Note that this means that some types
  4783. of values may not be convertable and will result in a
  4784. JSONEncodeError exception.
  4785. * 'compactly' (Boolean, default True)
  4786. If 'compactly' is set to True, then the resulting string will
  4787. have all extraneous white space removed; if False then the
  4788. string will be "pretty printed" with whitespace and
  4789. indentation added to make it more readable.
  4790. * 'encode_namedtuple_as_object' (Boolean or callable, default True)
  4791. If True, then objects of type namedtuple, or subclasses of
  4792. 'tuple' that have an _asdict() method, will be encoded as an
  4793. object rather than an array.
  4794. If can also be a predicate function that takes a namedtuple
  4795. object as an argument and returns True or False.
  4796. * 'indent_amount' (Integer, default 2)
  4797. The number of spaces to output for each indentation level.
  4798. If 'compactly' is True then indentation is ignored.
  4799. * 'indent_limit' (Integer or None, default None)
  4800. If not None, then this is the maximum limit of indentation
  4801. levels, after which further indentation spaces are not
  4802. inserted. If None, then there is no limit.
  4803. CONCERNING CHARACTER ENCODING:
  4804. The 'encoding' argument should be one of:
  4805. * None - The return will be a Unicode string.
  4806. * encoding_name - A string which is the name of a known
  4807. encoding, such as 'UTF-8' or 'ascii'.
  4808. * codec - A CodecInfo object, such as as found by codecs.lookup().
  4809. This allows you to use a custom codec as well as those
  4810. built into Python.
  4811. If an encoding is given (either by name or by codec), then the
  4812. returned value will be a byte array (Python 3), or a 'str' string
  4813. (Python 2); which represents the raw set of bytes. Otherwise,
  4814. if encoding is None, then the returned value will be a Unicode
  4815. string.
  4816. The 'escape_unicode' argument is used to determine which characters
  4817. in string literals must be \u escaped. Should be one of:
  4818. * True -- All non-ASCII characters are always \u escaped.
  4819. * False -- Try to insert actual Unicode characters if possible.
  4820. * function -- A user-supplied function that accepts a single
  4821. unicode character and returns True or False; where True
  4822. means to \u escape that character.
  4823. Regardless of escape_unicode, certain characters will always be
  4824. \u escaped. Additionaly any characters not in the output encoding
  4825. repertoire for the encoding codec will be \u escaped as well.
  4826. """
  4827. # Do the JSON encoding
  4828. j = JSON( **kwargs )
  4829. output = j.encode( obj, encoding )
  4830. return output
  4831. def decode( txt, encoding=None, **kwargs ):
  4832. """Decodes a JSON-encoded string into a Python object.
  4833. == Optional arguments ==
  4834. * 'encoding' (string, default None)
  4835. This argument provides a hint regarding the character encoding
  4836. that the input text is assumed to be in (if it is not already a
  4837. unicode string type).
  4838. If set to None then autodetection of the encoding is attempted
  4839. (see discussion above). Otherwise this argument should be the
  4840. name of a registered codec (see the standard 'codecs' module).
  4841. * 'strict' (Boolean, default False)
  4842. If 'strict' is set to True, then those strings that are not
  4843. entirely strictly conforming to JSON will result in a
  4844. JSONDecodeError exception.
  4845. * 'return_errors' (Boolean, default False)
  4846. Controls the return value from this function. If False, then
  4847. only the Python equivalent object is returned on success, or
  4848. an error will be raised as an exception.
  4849. If True then a 2-tuple is returned: (object, error_list). The
  4850. error_list will be an empty list [] if the decoding was
  4851. successful, otherwise it will be a list of all the errors
  4852. encountered. Note that it is possible for an object to be
  4853. returned even if errors were encountered.
  4854. * 'return_stats' (Boolean, default False)
  4855. Controls whether statistics about the decoded JSON document
  4856. are returns (and instance of decode_statistics).
  4857. If True, then the stats object will be added to the end of the
  4858. tuple returned. If return_errors is also set then a 3-tuple
  4859. is returned, otherwise a 2-tuple is returned.
  4860. * 'write_errors' (Boolean OR File-like object, default False)
  4861. Controls what to do with errors.
  4862. - If False, then the first decoding error is raised as an exception.
  4863. - If True, then errors will be printed out to sys.stderr.
  4864. - If a File-like object, then errors will be printed to that file.
  4865. The write_errors and return_errors arguments can be set
  4866. independently.
  4867. * 'filename_for_errors' (string or None)
  4868. Provides a filename to be used when writting error messages.
  4869. * 'allow_xxx', 'warn_xxx', and 'forbid_xxx' (Booleans)
  4870. These arguments allow for fine-adjustments to be made to the
  4871. 'strict' argument, by allowing or forbidding specific
  4872. syntaxes.
  4873. There are many of these arguments, named by replacing the
  4874. "xxx" with any number of possible behavior names (See the JSON
  4875. class for more details).
  4876. Each of these will allow (or forbid) the specific behavior,
  4877. after the evaluation of the 'strict' argument. For example,
  4878. if strict=True then by also passing 'allow_comments=True' then
  4879. comments will be allowed. If strict=False then
  4880. forbid_comments=True will allow everything except comments.
  4881. Unicode decoding:
  4882. -----------------
  4883. The input string can be either a python string or a python unicode
  4884. string (or a byte array in Python 3). If it is already a unicode
  4885. string, then it is assumed that no character set decoding is
  4886. required.
  4887. However, if you pass in a non-Unicode text string (a Python 2
  4888. 'str' type or a Python 3 'bytes' or 'bytearray') then an attempt
  4889. will be made to auto-detect and decode the character encoding.
  4890. This will be successful if the input was encoded in any of UTF-8,
  4891. UTF-16 (BE or LE), or UTF-32 (BE or LE), and of course plain ASCII
  4892. works too.
  4893. Note though that if you know the character encoding, then you
  4894. should convert to a unicode string yourself, or pass it the name
  4895. of the 'encoding' to avoid the guessing made by the auto
  4896. detection, as with
  4897. python_object = demjson.decode( input_bytes, encoding='utf8' )
  4898. Callback hooks:
  4899. ---------------
  4900. You may supply callback hooks by using the hook name as the
  4901. named argument, such as:
  4902. decode_float=decimal.Decimal
  4903. See the hooks documentation on the JSON.set_hook() method.
  4904. """
  4905. import sys
  4906. # Initialize the JSON object
  4907. return_errors = False
  4908. return_stats = False
  4909. write_errors = False
  4910. filename_for_errors = None
  4911. write_stats = False
  4912. kwargs = kwargs.copy()
  4913. todel = []
  4914. for kw,val in kwargs.items():
  4915. if kw == "return_errors":
  4916. return_errors = bool(val)
  4917. todel.append(kw)
  4918. elif kw == 'return_stats':
  4919. return_stats = bool(val)
  4920. todel.append(kw)
  4921. elif kw == "write_errors":
  4922. write_errors = val
  4923. todel.append(kw)
  4924. elif kw == "filename_for_errors":
  4925. filename_for_errors = val
  4926. todel.append(kw)
  4927. elif kw == "write_stats":
  4928. write_stats = val
  4929. todel.append(kw)
  4930. # next keyword argument
  4931. for kw in todel:
  4932. del kwargs[kw]
  4933. j = JSON( **kwargs )
  4934. # Now do the actual JSON decoding
  4935. result = j.decode( txt,
  4936. encoding=encoding,
  4937. return_errors=(return_errors or write_errors),
  4938. return_stats=(return_stats or write_stats) )
  4939. if write_errors:
  4940. import sys
  4941. if write_errors is True:
  4942. write_errors = sys.stderr
  4943. for err in result.errors:
  4944. write_errors.write( err.pretty_description(filename=filename_for_errors) + "\n" )
  4945. if write_stats:
  4946. import sys
  4947. if write_stats is True:
  4948. write_stats = sys.stderr
  4949. if result.stats:
  4950. write_stats.write( "%s----- Begin JSON statistics\n" % filename_for_errors )
  4951. write_stats.write( result.stats.pretty_description( prefix=" | " ) )
  4952. write_stats.write( "%s----- End of JSON statistics\n" % filename_for_errors )
  4953. return result
  4954. def encode_to_file( filename, obj, encoding='utf-8', overwrite=False, **kwargs ):
  4955. """Encodes a Python object into JSON and writes into the given file.
  4956. If no encoding is given, then UTF-8 will be used.
  4957. See the encode() function for a description of other possible options.
  4958. If the file already exists and the 'overwrite' option is not set
  4959. to True, then the existing file will not be overwritten. (Note,
  4960. there is a subtle race condition in the check so there are
  4961. possible conditions in which a file may be overwritten)
  4962. """
  4963. import os, errno
  4964. if not encoding:
  4965. encoding = 'utf-8'
  4966. if not isinstance(filename,basestring) or not filename:
  4967. raise TypeError("Expected a file name")
  4968. if not overwrite and os.path.exists(filename):
  4969. raise IOError(errno.EEXIST, "File exists: %r" % filename)
  4970. jsondata = encode( obj, encoding=encoding, **kwargs )
  4971. try:
  4972. fp = open(filename, 'wb')
  4973. except Exception:
  4974. raise
  4975. else:
  4976. try:
  4977. fp.write( jsondata )
  4978. finally:
  4979. fp.close()
  4980. def decode_file( filename, encoding=None, **kwargs ):
  4981. """Decodes JSON found in the given file.
  4982. See the decode() function for a description of other possible options.
  4983. """
  4984. if isinstance(filename,basestring):
  4985. try:
  4986. fp = open(filename, 'rb')
  4987. except Exception:
  4988. raise
  4989. else:
  4990. try:
  4991. jsondata = fp.read()
  4992. finally:
  4993. fp.close()
  4994. else:
  4995. raise TypeError("Expected a file name")
  4996. return decode( jsondata, encoding=encoding, **kwargs )
  4997. # ======================================================================
  4998. class jsonlint(object):
  4999. """This class contains most of the logic for the "jsonlint" command.
  5000. You generally create an instance of this class, to defined the
  5001. program's environment, and then call the main() method. A simple
  5002. wrapper to turn this into a script might be:
  5003. import sys, demjson
  5004. if __name__ == '__main__':
  5005. lint = demjson.jsonlint( sys.argv[0] )
  5006. return lint.main( sys.argv[1:] )
  5007. """
  5008. _jsonlint_usage = r"""Usage: %(program_name)s [<options> ...] [--] inputfile.json ...
  5009. With no input filename, or "-", it will read from standard input.
  5010. The return status will be 0 if the file is conforming JSON (per the
  5011. RFC 7159 specification), or non-zero otherwise.
  5012. GENERAL OPTIONS:
  5013. -v | --verbose Show details of lint checking
  5014. -q | --quiet Don't show any output (except for reformatting)
  5015. STRICTNESS OPTIONS (WARNINGS AND ERRORS):
  5016. -W | --tolerant Be tolerant, but warn about non-conformance (default)
  5017. -s | --strict Be strict in what is considered conforming JSON
  5018. -S | --nonstrict Be tolerant in what is considered conforming JSON
  5019. --allow=... -\
  5020. --warn=... |-- These options let you pick specific behaviors.
  5021. --forbid=... -/ Use --help-behaviors for more
  5022. STATISTICS OPTIONS:
  5023. --stats Show statistics about JSON document
  5024. REFORMATTING OPTIONS:
  5025. -f | --format Reformat the JSON text (if conforming) to stdout
  5026. -F | --format-compactly
  5027. Reformat the JSON simlar to -f, but do so compactly by
  5028. removing all unnecessary whitespace
  5029. -o filename | --output filename
  5030. The filename to which reformatted JSON is to be written.
  5031. Without this option the standard output is used.
  5032. --[no-]keep-format Try to preserve numeric radix, e.g., hex, octal, etc.
  5033. --html-safe Escape characters that are not safe to embed in HTML/XML.
  5034. --sort <kind> How to sort object/dictionary keys, <kind> is one of:
  5035. %(sort_options_help)s
  5036. --indent tabs | <nnn> Number of spaces to use per indentation level,
  5037. or use tab characters if "tabs" given.
  5038. UNICODE OPTIONS:
  5039. -e codec | --encoding=codec Set both input and output encodings
  5040. --input-encoding=codec Set the input encoding
  5041. --output-encoding=codec Set the output encoding
  5042. These options set the character encoding codec (e.g., "ascii",
  5043. "utf-8", "utf-16"). The -e will set both the input and output
  5044. encodings to the same thing. The output encoding is used when
  5045. reformatting with the -f or -F options.
  5046. Unless set, the input encoding is guessed and the output
  5047. encoding will be "utf-8".
  5048. OTHER OPTIONS:
  5049. --recursion-limit=nnn Set the Python recursion limit to number
  5050. --leading-zero-radix=8|10 The radix to use for numbers with leading
  5051. zeros. 8=octal, 10=decimal.
  5052. REFORMATTING / PRETTY-PRINTING:
  5053. When reformatting JSON with -f or -F, output is only produced if
  5054. the input passed validation. By default the reformatted JSON will
  5055. be written to standard output, unless the -o option was given.
  5056. The default output codec is UTF-8, unless an encoding option is
  5057. provided. Any Unicode characters will be output as literal
  5058. characters if the encoding permits, otherwise they will be
  5059. \u-escaped. You can use "--output-encoding ascii" to force all
  5060. Unicode characters to be escaped.
  5061. MORE INFORMATION:
  5062. Use '%(program_name)s --version [-v]' to see versioning information.
  5063. Use '%(program_name)s --copyright' to see author and copyright details.
  5064. Use '%(program_name)s [-W|-s|-S] --help-behaviors' for help on specific checks.
  5065. %(program_name)s is distributed as part of the "demjson" Python module.
  5066. See %(homepage)s
  5067. """
  5068. SUCCESS_FAIL = 'E'
  5069. SUCCESS_WARNING = 'W'
  5070. SUCCESS_OK = 'OK'
  5071. def __init__(self, program_name='jsonlint', stdin=None, stdout=None, stderr=None ):
  5072. """Create an instance of a "jsonlint" program.
  5073. You can optionally pass options to define the program's environment:
  5074. * program_name - the name of the program, usually sys.argv[0]
  5075. * stdin - the file object to use for input, default sys.stdin
  5076. * stdout - the file object to use for outut, default sys.stdout
  5077. * stderr - the file object to use for error output, default sys.stderr
  5078. After creating an instance, you typically call the main() method.
  5079. """
  5080. import os, sys
  5081. self.program_path = program_name
  5082. self.program_name = os.path.basename(program_name)
  5083. if stdin:
  5084. self.stdin = stdin
  5085. else:
  5086. self.stdin = sys.stdin
  5087. if stdout:
  5088. self.stdout = stdout
  5089. else:
  5090. self.stdout = sys.stdout
  5091. if stderr:
  5092. self.stderr = stderr
  5093. else:
  5094. self.stderr = sys.stderr
  5095. @property
  5096. def usage(self):
  5097. """A multi-line string containing the program usage instructions.
  5098. """
  5099. sorthelp = '\n'.join([
  5100. " %12s - %s" % (sm, sd)
  5101. for sm, sd in sorted(sorting_methods.items()) if sm != SORT_NONE ])
  5102. return self._jsonlint_usage % {'program_name':self.program_name,
  5103. 'homepage':__homepage__,
  5104. 'sort_options_help': sorthelp }
  5105. def _lintcheck_data( self,
  5106. jsondata,
  5107. verbose_fp=None,
  5108. reformat=False,
  5109. show_stats=False,
  5110. input_encoding=None, output_encoding=None, escape_unicode=True,
  5111. pfx='',
  5112. jsonopts=None ):
  5113. global decode, encode
  5114. success = self.SUCCESS_FAIL
  5115. reformatted = None
  5116. if show_stats:
  5117. stats_fp = verbose_fp
  5118. else:
  5119. stats_fp = None
  5120. try:
  5121. results = decode( jsondata, encoding=input_encoding,
  5122. return_errors=True,
  5123. return_stats=True,
  5124. write_errors=verbose_fp,
  5125. write_stats=stats_fp,
  5126. filename_for_errors=pfx,
  5127. json_options=jsonopts )
  5128. except JSONError, err:
  5129. success = self.SUCCESS_FAIL
  5130. if verbose_fp:
  5131. verbose_fp.write('%s%s\n' % (pfx, err.pretty_description()) )
  5132. except Exception, err:
  5133. success = self.SUCCESS_FAIL
  5134. if verbose_fp:
  5135. verbose_fp.write('%s%s\n' % (pfx, str(err) ))
  5136. else:
  5137. errors = [err for err in results.errors if err.severity in ('fatal','error')]
  5138. warnings = [err for err in results.errors if err.severity in ('warning',)]
  5139. if errors:
  5140. success = self.SUCCESS_FAIL
  5141. elif warnings:
  5142. success = self.SUCCESS_WARNING
  5143. else:
  5144. success = self.SUCCESS_OK
  5145. if reformat:
  5146. encopts = jsonopts.copy()
  5147. encopts.strictness = STRICTNESS_TOLERANT
  5148. if reformat == 'compactly':
  5149. encopts.encode_compactly = True
  5150. else:
  5151. encopts.encode_compactly = False
  5152. reformatted = encode(results.object, encoding=output_encoding, json_options=encopts)
  5153. return (success, reformatted)
  5154. def _lintcheck( self, filename, output_filename,
  5155. verbose=False,
  5156. reformat=False,
  5157. show_stats=False,
  5158. input_encoding=None, output_encoding=None, escape_unicode=True,
  5159. jsonopts=None ):
  5160. import sys
  5161. verbose_fp = None
  5162. if not filename or filename == "-":
  5163. pfx = '<stdin>: '
  5164. jsondata = self.stdin.read()
  5165. if verbose:
  5166. verbose_fp = self.stderr
  5167. else:
  5168. pfx = '%s: ' % filename
  5169. try:
  5170. fp = open( filename, 'rb' )
  5171. jsondata = fp.read()
  5172. fp.close()
  5173. except IOError, err:
  5174. self.stderr.write('%s: %s\n' % (pfx, str(err)) )
  5175. return self.SUCCESS_FAIL
  5176. if verbose:
  5177. verbose_fp = self.stdout
  5178. success, reformatted = self._lintcheck_data(
  5179. jsondata,
  5180. verbose_fp=verbose_fp,
  5181. reformat=reformat,
  5182. show_stats=show_stats,
  5183. input_encoding=input_encoding, output_encoding=output_encoding,
  5184. pfx=pfx,
  5185. jsonopts=jsonopts )
  5186. if success != self.SUCCESS_FAIL and reformat:
  5187. if output_filename:
  5188. try:
  5189. fp = open( output_filename, 'wb' )
  5190. fp.write( reformatted )
  5191. except IOError, err:
  5192. self.stderr.write('%s: %s\n' % (pfx, str(err)) )
  5193. success = False
  5194. else:
  5195. if hasattr(sys.stdout,'buffer'): # To write binary data rather than strings
  5196. self.stdout.buffer.write( reformatted )
  5197. else:
  5198. self.stdout.write( reformatted )
  5199. elif success == self.SUCCESS_OK and verbose_fp:
  5200. verbose_fp.write('%sok\n' % pfx)
  5201. elif success == self.SUCCESS_WARNING and verbose_fp:
  5202. verbose_fp.write('%sok, with warnings\n' % pfx)
  5203. elif verbose_fp:
  5204. verbose_fp.write("%shas errors\n" % pfx)
  5205. return success
  5206. def main( self, argv ):
  5207. """The main routine for program "jsonlint".
  5208. Should be called with sys.argv[1:] as its sole argument.
  5209. Note sys.argv[0] which normally contains the program name
  5210. should not be passed to main(); instead this class itself
  5211. is initialized with sys.argv[0].
  5212. Use "--help" for usage syntax, or consult the 'usage' member.
  5213. """
  5214. import sys, os, getopt, unicodedata
  5215. recursion_limit = None
  5216. success = True
  5217. verbose = 'auto' # one of 'auto', True, or False
  5218. reformat = False
  5219. show_stats = False
  5220. output_filename = None
  5221. input_encoding = None
  5222. output_encoding = 'utf-8'
  5223. kwoptions = { # Will be used to initialize json_options
  5224. "sort_keys": SORT_SMART,
  5225. "strict": STRICTNESS_WARN,
  5226. "keep_format": True,
  5227. "decimal_context": 100,
  5228. }
  5229. try:
  5230. opts, args = getopt.getopt( argv,
  5231. 'vqfFe:o:sSW',
  5232. ['verbose','quiet',
  5233. 'format','format-compactly',
  5234. 'stats',
  5235. 'output',
  5236. 'strict','nonstrict','warn',
  5237. 'html-safe','xml-safe',
  5238. 'encoding=',
  5239. 'input-encoding=','output-encoding=',
  5240. 'sort=',
  5241. 'recursion-limit=',
  5242. 'leading-zero-radix=',
  5243. 'keep-format',
  5244. 'no-keep-format',
  5245. 'indent=',
  5246. 'indent-amount=',
  5247. 'indent-limit=',
  5248. 'indent-tab-width=',
  5249. 'max-items-per-line=',
  5250. 'allow=', 'warn=', 'forbid=', 'deny=',
  5251. 'help', 'help-behaviors',
  5252. 'version','copyright'] )
  5253. except getopt.GetoptError, err:
  5254. self.stderr.write( "Error: %s. Use \"%s --help\" for usage information.\n" \
  5255. % (err.msg, self.program_name) )
  5256. return 1
  5257. # Set verbose before looking at any other options
  5258. for opt, val in opts:
  5259. if opt in ('-v', '--verbose'):
  5260. verbose=True
  5261. # Process all options
  5262. for opt, val in opts:
  5263. if opt in ('-h', '--help'):
  5264. self.stdout.write( self.usage )
  5265. return 0
  5266. elif opt == '--help-behaviors':
  5267. self.stdout.write("""
  5268. BEHAVIOR OPTIONS:
  5269. These set of options let you control which checks are to be performed.
  5270. They may be turned on or off by listing them as arguments to one of
  5271. the options --allow, --warn, or --forbid ; for example:
  5272. %(program_name)s --allow comments,hex-numbers --forbid duplicate-keys
  5273. """ % {"program_name":self.program_name})
  5274. self.stdout.write("The default shown is for %s mode\n\n" % kwoptions['strict'])
  5275. self.stdout.write('%-7s %-25s %s\n' % ("Default", "Behavior_name", "Description"))
  5276. self.stdout.write('-'*7 + ' ' + '-'*25 + ' ' + '-'*50 + '\n')
  5277. j = json_options( **kwoptions )
  5278. for behavior in sorted(j.all_behaviors):
  5279. v = j.get_behavior( behavior )
  5280. desc = j.describe_behavior( behavior )
  5281. self.stdout.write('%-7s %-25s %s\n' % (v.lower(), behavior.replace('_','-'), desc))
  5282. return 0
  5283. elif opt == '--version':
  5284. self.stdout.write( '%s (%s) version %s (%s)\n' \
  5285. % (self.program_name, __name__, __version__, __date__) )
  5286. if verbose == True:
  5287. self.stdout.write( 'demjson from %r\n' % (__file__,) )
  5288. if verbose == True:
  5289. self.stdout.write( 'Python version: %s\n' % (sys.version.replace('\n',' '),) )
  5290. self.stdout.write( 'This python implementation supports:\n' )
  5291. self.stdout.write( ' * Max unicode: U+%X\n' % (sys.maxunicode,) )
  5292. self.stdout.write( ' * Unicode version: %s\n' % (unicodedata.unidata_version,) )
  5293. self.stdout.write( ' * Floating-point significant digits: %d\n' % (float_sigdigits,) )
  5294. self.stdout.write( ' * Floating-point max 10^exponent: %d\n' % (float_maxexp,) )
  5295. if str(0.0)==str(-0.0):
  5296. szero = 'No'
  5297. else:
  5298. szero = 'Yes'
  5299. self.stdout.write( ' * Floating-point has signed-zeros: %s\n' % (szero,) )
  5300. if decimal:
  5301. has_dec = 'Yes'
  5302. else:
  5303. has_dec = 'No'
  5304. self.stdout.write( ' * Decimal (bigfloat) support: %s\n' % (has_dec,) )
  5305. return 0
  5306. elif opt == '--copyright':
  5307. self.stdout.write( "%s is distributed as part of the \"demjson\" python package.\n" \
  5308. % (self.program_name,) )
  5309. self.stdout.write( "See %s\n\n\n" % (__homepage__,) )
  5310. self.stdout.write( __credits__ )
  5311. return 0
  5312. elif opt in ('-v', '--verbose'):
  5313. verbose = True
  5314. elif opt in ('-q', '--quiet'):
  5315. verbose = False
  5316. elif opt in ('-s', '--strict'):
  5317. kwoptions['strict'] = STRICTNESS_STRICT
  5318. kwoptions['keep_format'] = False
  5319. elif opt in ('-S', '--nonstrict'):
  5320. kwoptions['strict'] = STRICTNESS_TOLERANT
  5321. elif opt in ('-W', '--tolerant'):
  5322. kwoptions['strict'] = STRICTNESS_WARN
  5323. elif opt in ('-f', '--format'):
  5324. reformat = True
  5325. kwoptions['encode_compactly'] = False
  5326. elif opt in ('-F', '--format-compactly'):
  5327. kwoptions['encode_compactly'] = True
  5328. reformat = 'compactly'
  5329. elif opt in ('--stats',):
  5330. show_stats=True
  5331. elif opt in ('-o', '--output'):
  5332. output_filename = val
  5333. elif opt in ('-e','--encoding'):
  5334. input_encoding = val
  5335. output_encoding = val
  5336. escape_unicode = False
  5337. elif opt in ('--output-encoding'):
  5338. output_encoding = val
  5339. escape_unicode = False
  5340. elif opt in ('--input-encoding'):
  5341. input_encoding = val
  5342. elif opt in ('--html-safe','--xml-safe'):
  5343. kwoptions['html_safe'] = True
  5344. elif opt in ('--allow','--warn','--forbid'):
  5345. action = opt[2:]
  5346. if action in kwoptions:
  5347. kwoptions[action] += "," + val
  5348. else:
  5349. kwoptions[action] = val
  5350. elif opt in ('--keep-format',):
  5351. kwoptions['keep_format']=True
  5352. elif opt in ('--no-keep-format',):
  5353. kwoptions['keep_format']=False
  5354. elif opt == '--leading-zero-radix':
  5355. kwoptions['leading_zero_radix'] = val
  5356. elif opt in ('--indent', '--indent-amount'):
  5357. if val in ('tab','tabs'):
  5358. kwoptions['indent_amount'] = 8
  5359. kwoptions['indent_tab_width'] = 8
  5360. else:
  5361. try:
  5362. kwoptions['indent_amount'] = int(val)
  5363. except ValueError:
  5364. self.stderr.write("Indentation amount must be a number\n")
  5365. return 1
  5366. elif opt == 'indent-tab-width':
  5367. try:
  5368. kwoptions['indent_tab_width'] = int(val)
  5369. except ValueError:
  5370. self.stderr.write("Indentation tab width must be a number\n")
  5371. return 1
  5372. elif opt == '--max-items-per-line':
  5373. try:
  5374. kwoptions['max_items_per_line'] = int(val)
  5375. except ValueError:
  5376. self.stderr.write("Max items per line must be a number\n")
  5377. return 1
  5378. elif opt == '--sort':
  5379. val = val.lower()
  5380. if val == 'alpha':
  5381. kwoptions['sort_keys'] = SORT_ALPHA
  5382. elif val == 'alpha_ci':
  5383. kwoptions['sort_keys'] = SORT_ALPHA_CI
  5384. elif val == 'preserve':
  5385. kwoptions['sort_keys'] = SORT_PRESERVE
  5386. else:
  5387. kwoptions['sort_keys'] = SORT_SMART
  5388. elif opt == '--recursion-limit':
  5389. try:
  5390. recursion_limit = int(val)
  5391. except ValueError:
  5392. self.stderr.write("Recursion limit must be a number: %r\n" % val)
  5393. return 1
  5394. else:
  5395. max_limit = 100000
  5396. old_limit = sys.getrecursionlimit()
  5397. if recursion_limit > max_limit:
  5398. self.stderr.write("Recursion limit must be a number between %d and %d\n" % (old_limit,max_limit))
  5399. return 1
  5400. elif recursion_limit > old_limit:
  5401. sys.setrecursionlimit( recursion_limit )
  5402. else:
  5403. self.stderr.write('Unknown option %r\n' % opt)
  5404. return 1
  5405. # Make the JSON options
  5406. kwoptions['decimal_context'] = 100
  5407. jsonopts = json_options( **kwoptions )
  5408. # Now decode each file...
  5409. if not args:
  5410. args = [None]
  5411. for fn in args:
  5412. try:
  5413. rc = self._lintcheck( fn, output_filename=output_filename,
  5414. verbose=verbose,
  5415. reformat=reformat,
  5416. show_stats=show_stats,
  5417. input_encoding=input_encoding,
  5418. output_encoding=output_encoding,
  5419. jsonopts=jsonopts )
  5420. if rc != self.SUCCESS_OK:
  5421. # Warnings or errors should result in failure. If
  5422. # checking multiple files, do not change a
  5423. # previous error back to ok.
  5424. success = False
  5425. except KeyboardInterrupt, err:
  5426. sys.stderr.write("\njsonlint interrupted!\n")
  5427. sys.exit(1)
  5428. if not success:
  5429. return 1
  5430. return 0
  5431. # end file